[llvm] [Offload] Implement the remaining initial Offload API (PR #122106)

Callum Fare via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 15 07:57:29 PDT 2025


https://github.com/callumfare updated https://github.com/llvm/llvm-project/pull/122106

>From 1d8a6829be9224c61ebc025c288d2bf16ed3b78e Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 11 Dec 2024 12:08:44 +0000
Subject: [PATCH 1/9] [Offload] Implement remaining API for basic memory
 operations and kernel exec

---
 offload/liboffload/API/Common.td              |  27 +-
 offload/liboffload/API/Device.td              |  24 +-
 offload/liboffload/API/Enqueue.td             |  61 ++
 offload/liboffload/API/Event.td               |  41 +
 offload/liboffload/API/Kernel.td              |  45 +
 offload/liboffload/API/Memory.td              |  48 +
 offload/liboffload/API/OffloadAPI.td          |   6 +
 offload/liboffload/API/Platform.td            |  12 +-
 offload/liboffload/API/Program.td             |  46 +
 offload/liboffload/API/Queue.td               |  54 ++
 offload/liboffload/API/README.md              |   6 +-
 offload/liboffload/include/OffloadImpl.hpp    |   1 +
 .../liboffload/include/generated/OffloadAPI.h | 750 +++++++++++++-
 .../include/generated/OffloadEntryPoints.inc  | 918 ++++++++++++++++--
 .../include/generated/OffloadFuncs.inc        |  36 +
 .../generated/OffloadImplFuncDecls.inc        |  52 +
 .../include/generated/OffloadPrint.hpp        | 400 ++++++--
 offload/liboffload/src/OffloadImpl.cpp        | 354 ++++++-
 offload/liboffload/src/OffloadLib.cpp         |   7 +-
 offload/tools/offload-tblgen/APIGen.cpp       |   9 +-
 .../tools/offload-tblgen/EntryPointGen.cpp    |  17 +-
 offload/tools/offload-tblgen/PrintGen.cpp     |  67 +-
 offload/tools/offload-tblgen/RecordTypes.hpp  |   2 +
 offload/unittests/OffloadAPI/CMakeLists.txt   |  23 +-
 .../OffloadAPI/common/Environment.cpp         |  36 +
 .../OffloadAPI/common/Environment.hpp         |   4 +
 .../unittests/OffloadAPI/common/Fixtures.hpp  |  74 +-
 .../OffloadAPI/device/olGetDevice.cpp         |   2 +-
 .../OffloadAPI/device/olGetDeviceCount.cpp    |   2 +-
 .../OffloadAPI/device/olGetDeviceInfo.cpp     |   4 +-
 .../OffloadAPI/device/olGetDeviceInfoSize.cpp |   4 +-
 .../OffloadAPI/device_code/CMakeLists.txt     |  68 ++
 .../unittests/OffloadAPI/device_code/bar.c    |   5 +
 .../unittests/OffloadAPI/device_code/foo.c    |   5 +
 .../enqueue/olEnqueueKernelLaunch.cpp         |  66 ++
 .../OffloadAPI/enqueue/olEnqueueMemcpy.cpp    |  71 ++
 .../OffloadAPI/kernel/olCreateKernel.cpp      |  31 +
 .../OffloadAPI/kernel/olReleaseKernel.cpp     |  22 +
 .../OffloadAPI/kernel/olRetainKernel.cpp      |  19 +
 .../OffloadAPI/memory/olMemAlloc.cpp          |  45 +
 .../unittests/OffloadAPI/memory/olMemFree.cpp |  47 +
 .../OffloadAPI/platform/olGetPlatform.cpp     |   2 +-
 .../platform/olGetPlatformCount.cpp           |   2 +-
 .../OffloadAPI/platform/olGetPlatformInfo.cpp |   2 +-
 .../platform/olGetPlatformInfoSize.cpp        |   2 +-
 .../OffloadAPI/platform/olPlatformInfo.hpp    |   1 +
 .../OffloadAPI/program/olCreateProgram.cpp    |  27 +
 .../OffloadAPI/program/olReleaseProgram.cpp   |  22 +
 .../OffloadAPI/program/olRetainProgram.cpp    |  21 +
 .../OffloadAPI/queue/olCreateQueue.cpp        |  28 +
 .../OffloadAPI/queue/olReleaseQueue.cpp       |  21 +
 .../OffloadAPI/queue/olRetainQueue.cpp        |  18 +
 .../OffloadAPI/queue/olWaitQueue.cpp          |  17 +
 53 files changed, 3424 insertions(+), 250 deletions(-)
 create mode 100644 offload/liboffload/API/Enqueue.td
 create mode 100644 offload/liboffload/API/Event.td
 create mode 100644 offload/liboffload/API/Kernel.td
 create mode 100644 offload/liboffload/API/Memory.td
 create mode 100644 offload/liboffload/API/Program.td
 create mode 100644 offload/liboffload/API/Queue.td
 create mode 100644 offload/unittests/OffloadAPI/device_code/CMakeLists.txt
 create mode 100644 offload/unittests/OffloadAPI/device_code/bar.c
 create mode 100644 offload/unittests/OffloadAPI/device_code/foo.c
 create mode 100644 offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
 create mode 100644 offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
 create mode 100644 offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
 create mode 100644 offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp
 create mode 100644 offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp
 create mode 100644 offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
 create mode 100644 offload/unittests/OffloadAPI/memory/olMemFree.cpp
 create mode 100644 offload/unittests/OffloadAPI/program/olCreateProgram.cpp
 create mode 100644 offload/unittests/OffloadAPI/program/olReleaseProgram.cpp
 create mode 100644 offload/unittests/OffloadAPI/program/olRetainProgram.cpp
 create mode 100644 offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
 create mode 100644 offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
 create mode 100644 offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
 create mode 100644 offload/unittests/OffloadAPI/queue/olWaitQueue.cpp

diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index 5b19d1d47129e..a0a2697e27e77 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -62,6 +62,26 @@ def : Handle {
   let desc = "Handle of context object";
 }
 
+def : Handle {
+  let name = "ol_queue_handle_t";
+  let desc = "Handle of queue object";
+}
+
+def : Handle {
+  let name = "ol_event_handle_t";
+  let desc = "Handle of event object";
+}
+
+def : Handle {
+  let name = "ol_program_handle_t";
+  let desc = "Handle of program object";
+}
+
+def : Handle {
+  let name = "ol_kernel_handle_t";
+  let desc = "Handle of kernel object";
+}
+
 def : Enum {
   let name = "ol_errc_t";
   let desc = "Defines Return/Error codes";
@@ -69,12 +89,11 @@ def : Enum {
     Etor<"SUCCESS", "Success">,
     Etor<"INVALID_VALUE", "Invalid Value">,
     Etor<"INVALID_PLATFORM", "Invalid platform">,
-    Etor<"DEVICE_NOT_FOUND", "Device not found">,
     Etor<"INVALID_DEVICE", "Invalid device">,
-    Etor<"DEVICE_LOST", "Device hung, reset, was removed, or driver update occurred">,
-    Etor<"UNINITIALIZED", "plugin is not initialized or specific entry-point is not implemented">,
+    Etor<"INVALID_QUEUE", "Invalid queue">,
+    Etor<"INVALID_EVENT", "Invalid event">,
+    Etor<"INVALID_KERNEL_NAME", "Named kernel not found in the program binary">,
     Etor<"OUT_OF_RESOURCES", "Out of resources">,
-    Etor<"UNSUPPORTED_VERSION", "generic error code for unsupported versions">,
     Etor<"UNSUPPORTED_FEATURE", "generic error code for unsupported features">,
     Etor<"INVALID_ARGUMENT", "generic error code for invalid arguments">,
     Etor<"INVALID_NULL_HANDLE", "handle argument is not valid">,
diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index 30c0b71fe7b37..36fbdcfd05153 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -12,7 +12,7 @@
 
 def : Enum {
   let name = "ol_device_type_t";
-  let desc = "Supported device types";
+  let desc = "Supported device types.";
   let etors =[
     Etor<"DEFAULT", "The default device type as preferred by the runtime">,
     Etor<"ALL", "Devices of all types">,
@@ -23,7 +23,7 @@ def : Enum {
 
 def : Enum {
   let name = "ol_device_info_t";
-  let desc = "Supported device info";
+  let desc = "Supported device info.";
   let is_typed = 1;
   let etors =[
     TaggedEtor<"TYPE", "ol_device_type_t", "type of the device">,
@@ -36,7 +36,7 @@ def : Enum {
 
 def : Function {
   let name = "olGetDeviceCount";
-  let desc = "Retrieves the number of available devices within a platform";
+  let desc = "Retrieves the number of available devices within a platform.";
   let params = [
     Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>,
     Param<"uint32_t*", "NumDevices", "pointer to the number of devices.", PARAM_OUT>
@@ -46,7 +46,7 @@ def : Function {
 
 def : Function {
   let name = "olGetDevice";
-  let desc = "Retrieves devices within a platform";
+  let desc = "Retrieves devices within a platform.";
   let details = [
     "Multiple calls to this function will return identical device handles, in the same order.",
   ];
@@ -66,7 +66,7 @@ def : Function {
 
 def : Function {
   let name = "olGetDeviceInfo";
-  let desc = "Queries the given property of the device";
+  let desc = "Queries the given property of the device.";
   let details = [];
   let params = [
     Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>,
@@ -90,7 +90,7 @@ def : Function {
 
 def : Function {
   let name = "olGetDeviceInfoSize";
-  let desc = "Returns the storage size of the given device query";
+  let desc = "Returns the storage size of the given device query.";
   let details = [];
   let params = [
     Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>,
@@ -104,3 +104,15 @@ def : Function {
     Return<"OL_ERRC_INVALID_DEVICE">
   ];
 }
+
+def : Function {
+  let name = "olGetHostDevice";
+  let desc = "Return the special host device used to represent the host in memory transfer operations.";
+  let details = [
+    "The host device does not support queues"
+  ];
+  let params = [
+    Param<"ol_device_handle_t*", "Device", "Output pointer for the device">
+  ]; // TODO: Take a platform?
+  let returns = [];
+}
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
new file mode 100644
index 0000000000000..c6f1c5ddc4b51
--- /dev/null
+++ b/offload/liboffload/API/Enqueue.td
@@ -0,0 +1,61 @@
+//===-- Enqueue.td - Enqueue definitions for Offload -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to enqueable operations
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olEnqueueMemcpy";
+    let desc = "Enqueue a memcpy operation.";
+    let details = [
+        "For host pointers, use the device returned by olGetHostDevice",
+        "At least one device must be a non-host device"
+    ];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"void*", "DstPtr", "pointer to copy to", PARAM_IN>,
+        Param<"ol_device_handle_t", "DstDevice", "device that DstPtr belongs to", PARAM_IN>,
+        Param<"void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
+        Param<"ol_device_handle_t", "SrcDevice", "device that SrcPtr belongs to", PARAM_IN>,
+        Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [
+        Return<"OL_ERRC_INVALID_SIZE", ["`Size == 0`"]>
+    ];
+}
+
+def : Struct {
+    let name = "ol_kernel_launch_size_args_t";
+    let desc = "Size-related arguments for a kernel launch.";
+    let members = [
+        StructMember<"size_t", "Dimensions", "Number of work dimensions">,
+        StructMember<"size_t", "NumGroupsX", "Number of work groups on the X dimension">,
+        StructMember<"size_t", "NumGroupsY", "Number of work groups on the Y dimension">,
+        StructMember<"size_t", "NumGroupsZ", "Number of work groups on the Z dimension">,
+        StructMember<"size_t", "GroupSizeX", "Size of a work group on the X dimension.">,
+        StructMember<"size_t", "GroupSizeY", "Size of a work group on the Y dimension.">,
+        StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">
+    ];
+}
+
+def : Function {
+    let name = "olEnqueueKernelLaunch";
+    let desc = "Enqueue a kernel launch with the specified size and parameters.";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
+        Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
+        Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td
new file mode 100644
index 0000000000000..066704efba5b6
--- /dev/null
+++ b/offload/liboffload/API/Event.td
@@ -0,0 +1,41 @@
+//===-- Event.td - Event definitions for Offload -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the event handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olRetainEvent";
+    let desc = "Increment the event's reference count.";
+    let details = [];
+    let params = [
+        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseEvent";
+    let desc = "Decrement the event's reference count, and free it if the reference count reaches 0.";
+    let details = [];
+    let params = [
+        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olWaitEvent";
+    let desc = "Wait for the event to be complete.";
+    let details = [];
+    let params = [
+        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
new file mode 100644
index 0000000000000..3620e02c3b7bf
--- /dev/null
+++ b/offload/liboffload/API/Kernel.td
@@ -0,0 +1,45 @@
+//===-- Kernel.td - Kernel definitions for Offload ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the kernel handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olCreateKernel";
+    let desc = "Create a kernel from the function identified by `KernelName` in the given program.";
+    let details = [
+        "The created kernel has an initial reference count of 1."
+    ];
+    let params = [
+        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
+        Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
+        Param<"ol_kernel_handle_t*", "Kernel", "output pointer for the created kernel", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olRetainKernel";
+    let desc = "Increment the kernel's reference count.";
+    let details = [];
+    let params = [
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseKernel";
+    let desc = "Decrement the kernel's reference count, and free it if the reference count reaches 0.";
+    let details = [];
+    let params = [
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
new file mode 100644
index 0000000000000..ca0200864d6f0
--- /dev/null
+++ b/offload/liboffload/API/Memory.td
@@ -0,0 +1,48 @@
+//===-- Memory.td - Memory definitions for Offload ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to memory allocations
+//
+//===----------------------------------------------------------------------===//
+
+def : Enum {
+  let name = "ol_alloc_type_t";
+  let desc = "Represents the type of allocation made with olMemAlloc.";
+  let etors = [
+    Etor<"HOST", "Host allocation">,
+    Etor<"DEVICE", "Device allocation">,
+    Etor<"SHARED", "Shared allocation">
+  ];
+}
+
+def : Function {
+  let name = "olMemAlloc";
+  let desc = "Creates a memory allocation on the specified device.";
+  let params = [
+    Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
+    Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
+    Param<"size_t", "Size", "size of the allocation in bytes", PARAM_IN>,
+    Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
+  ];
+  let returns = [
+    Return<"OL_ERRC_INVALID_SIZE", [
+      "`Size == 0`"
+    ]>
+  ];
+}
+
+def : Function {
+  let name = "olMemFree";
+  let desc = "Frees a memory allocation previously made by olMemAlloc.";
+  let params = [
+    Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
+    Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
+    Param<"void*", "Address", "address of the allocation to free", PARAM_IN>,
+  ];
+  let returns = [];
+}
diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td
index 8a0c3c4058122..f2822b93e6bf8 100644
--- a/offload/liboffload/API/OffloadAPI.td
+++ b/offload/liboffload/API/OffloadAPI.td
@@ -13,3 +13,9 @@ include "APIDefs.td"
 include "Common.td"
 include "Platform.td"
 include "Device.td"
+include "Memory.td"
+include "Queue.td"
+include "Event.td"
+include "Enqueue.td"
+include "Program.td"
+include "Kernel.td"
diff --git a/offload/liboffload/API/Platform.td b/offload/liboffload/API/Platform.td
index 03e70cf96ac94..f2a09fa41a338 100644
--- a/offload/liboffload/API/Platform.td
+++ b/offload/liboffload/API/Platform.td
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 def : Function {
   let name = "olGetPlatform";
-  let desc = "Retrieves all available platforms";
+  let desc = "Retrieves all available platforms.";
   let details = [
     "Multiple calls to this function will return identical platforms handles, in the same order.",
   ];
@@ -35,7 +35,7 @@ def : Function {
 
 def : Function {
   let name = "olGetPlatformCount";
-  let desc = "Retrieves the number of available platforms";
+  let desc = "Retrieves the number of available platforms.";
   let params = [
     Param<"uint32_t*",
       "NumPlatforms", "returns the total number of platforms available.",
@@ -46,7 +46,7 @@ def : Function {
 
 def : Enum {
   let name = "ol_platform_info_t";
-  let desc = "Supported platform info";
+  let desc = "Supported platform info.";
   let is_typed = 1;
   let etors = [
     TaggedEtor<"NAME", "char[]", "The string denoting name of the platform. The size of the info needs to be dynamically queried.">,
@@ -58,7 +58,7 @@ def : Enum {
 
 def : Enum {
   let name = "ol_platform_backend_t";
-  let desc = "Identifies the native backend of the platform";
+  let desc = "Identifies the native backend of the platform.";
   let etors =[
     Etor<"UNKNOWN", "The backend is not recognized">,
     Etor<"CUDA", "The backend is CUDA">,
@@ -68,7 +68,7 @@ def : Enum {
 
 def : Function {
   let name = "olGetPlatformInfo";
-  let desc = "Queries the given property of the platform";
+  let desc = "Queries the given property of the platform.";
   let details = [
     "`olGetPlatformInfoSize` can be used to query the storage size "
     "required for the given query."
@@ -96,7 +96,7 @@ def : Function {
 
 def : Function {
   let name = "olGetPlatformInfoSize";
-  let desc = "Returns the storage size of the given platform query";
+  let desc = "Returns the storage size of the given platform query.";
   let details = [];
   let params = [
     Param<"ol_platform_handle_t", "Platform", "handle of the platform", PARAM_IN>,
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
new file mode 100644
index 0000000000000..df644934f8b2f
--- /dev/null
+++ b/offload/liboffload/API/Program.td
@@ -0,0 +1,46 @@
+//===-- Program.td - Program definitions for Offload -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the program handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olCreateProgram";
+    let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
+    let details = [
+        "The created program has an initial reference count of 1."
+    ];
+    let params = [
+        Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
+        Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
+        Param<"size_t", "ProgDataSize", "size of the program binary in bytes", PARAM_IN>,
+        Param<"ol_program_handle_t*", "Program", "output pointer for the created program", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olRetainProgram";
+    let desc = "Increment the program's reference count.";
+    let details = [];
+    let params = [
+        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseProgram";
+    let desc = "Decrement the program's reference count, and free it if the reference count reaches 0.";
+    let details = [];
+    let params = [
+        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
new file mode 100644
index 0000000000000..6725d6737ce2a
--- /dev/null
+++ b/offload/liboffload/API/Queue.td
@@ -0,0 +1,54 @@
+//===-- Queue.td - Queue definitions for Offload -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the queue handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olCreateQueue";
+    let desc = "Create a queue for the given device.";
+    let details = [
+        "The created queue has an initial reference count of 1."
+    ];
+    let params = [
+        Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
+        Param<"ol_queue_handle_t*", "Queue", "output pointer for the created queue", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olRetainQueue";
+    let desc = "Increment the queue's reference count.";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseQueue";
+    let desc = "Decrement the queues's reference count, and free it if the reference count reaches 0.";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olWaitQueue";
+    let desc = "Wait for the enqueued work on a queue to complete.";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/README.md b/offload/liboffload/API/README.md
index b59ac2782a2be..fda1ad39fa937 100644
--- a/offload/liboffload/API/README.md
+++ b/offload/liboffload/API/README.md
@@ -138,8 +138,8 @@ allow more backends to be easily added in future.
 
 A new object can be added to the API by adding to one of the existing `.td`
 files. It is also possible to add a new tablegen file to the API by adding it
-to the includes in `OffloadAPI.td`. When the offload target is rebuilt, the
-new definition will be included in the generated files.
+to the includes in `OffloadAPI.td`. When the `OffloadGenerate` target is
+rebuilt, the new definition will be included in the generated files.
 
 ### Adding a new entry point
 
@@ -147,4 +147,4 @@ When a new entry point is added (e.g. `offloadDeviceFoo`), the actual entry
 point is automatically generated, which contains validation and tracing code.
 It expects an implementation function (`offloadDeviceFoo_impl`) to be defined,
 which it will call into. The definition of this implementation function should
-be added to `src/offload_impl.cpp`
+be added to `src/OffloadImpl.cpp`
diff --git a/offload/liboffload/include/OffloadImpl.hpp b/offload/liboffload/include/OffloadImpl.hpp
index 6d745095f3105..ec470a355309a 100644
--- a/offload/liboffload/include/OffloadImpl.hpp
+++ b/offload/liboffload/include/OffloadImpl.hpp
@@ -22,6 +22,7 @@
 
 struct OffloadConfig {
   bool TracingEnabled = false;
+  bool ValidationEnabled = true;
 };
 
 OffloadConfig &offloadConfig();
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 11fcc96625ab8..dc291907570f1 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -75,15 +75,31 @@ extern "C" {
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Handle of a platform instance
-typedef struct ol_platform_handle_t_ *ol_platform_handle_t;
+typedef struct ol_platform_impl_t *ol_platform_handle_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Handle of platform's device object
-typedef struct ol_device_handle_t_ *ol_device_handle_t;
+typedef struct ol_device_impl_t *ol_device_handle_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Handle of context object
-typedef struct ol_context_handle_t_ *ol_context_handle_t;
+typedef struct ol_context_impl_t *ol_context_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of queue object
+typedef struct ol_queue_impl_t *ol_queue_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of event object
+typedef struct ol_event_impl_t *ol_event_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of program object
+typedef struct ol_program_impl_t *ol_program_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of kernel object
+typedef struct ol_kernel_impl_t *ol_kernel_handle_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Defines Return/Error codes
@@ -94,34 +110,32 @@ typedef enum ol_errc_t {
   OL_ERRC_INVALID_VALUE = 1,
   /// Invalid platform
   OL_ERRC_INVALID_PLATFORM = 2,
-  /// Device not found
-  OL_ERRC_DEVICE_NOT_FOUND = 3,
   /// Invalid device
-  OL_ERRC_INVALID_DEVICE = 4,
-  /// Device hung, reset, was removed, or driver update occurred
-  OL_ERRC_DEVICE_LOST = 5,
-  /// plugin is not initialized or specific entry-point is not implemented
-  OL_ERRC_UNINITIALIZED = 6,
+  OL_ERRC_INVALID_DEVICE = 3,
+  /// Invalid queue
+  OL_ERRC_INVALID_QUEUE = 4,
+  /// Invalid event
+  OL_ERRC_INVALID_EVENT = 5,
+  /// Named kernel not found in the program binary
+  OL_ERRC_INVALID_KERNEL_NAME = 6,
   /// Out of resources
   OL_ERRC_OUT_OF_RESOURCES = 7,
-  /// generic error code for unsupported versions
-  OL_ERRC_UNSUPPORTED_VERSION = 8,
   /// generic error code for unsupported features
-  OL_ERRC_UNSUPPORTED_FEATURE = 9,
+  OL_ERRC_UNSUPPORTED_FEATURE = 8,
   /// generic error code for invalid arguments
-  OL_ERRC_INVALID_ARGUMENT = 10,
+  OL_ERRC_INVALID_ARGUMENT = 9,
   /// handle argument is not valid
-  OL_ERRC_INVALID_NULL_HANDLE = 11,
+  OL_ERRC_INVALID_NULL_HANDLE = 10,
   /// pointer argument may not be nullptr
-  OL_ERRC_INVALID_NULL_POINTER = 12,
+  OL_ERRC_INVALID_NULL_POINTER = 11,
   /// invalid size or dimensions (e.g., must not be zero, or is out of bounds)
-  OL_ERRC_INVALID_SIZE = 13,
+  OL_ERRC_INVALID_SIZE = 12,
   /// enumerator argument is not valid
-  OL_ERRC_INVALID_ENUMERATION = 14,
+  OL_ERRC_INVALID_ENUMERATION = 13,
   /// enumerator argument is not supported by the device
-  OL_ERRC_UNSUPPORTED_ENUMERATION = 15,
+  OL_ERRC_UNSUPPORTED_ENUMERATION = 14,
   /// Unknown or internal error
-  OL_ERRC_UNKNOWN = 16,
+  OL_ERRC_UNKNOWN = 15,
   /// @cond
   OL_ERRC_FORCE_UINT32 = 0x7fffffff
   /// @endcond
@@ -188,7 +202,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olInit();
 OL_APIEXPORT ol_result_t OL_APICALL olShutDown();
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves all available platforms
+/// @brief Retrieves all available platforms.
 ///
 /// @details
 ///    - Multiple calls to this function will return identical platforms
@@ -213,7 +227,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetPlatform(
     ol_platform_handle_t *Platforms);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves the number of available platforms
+/// @brief Retrieves the number of available platforms.
 ///
 /// @details
 ///
@@ -229,7 +243,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(
     uint32_t *NumPlatforms);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Supported platform info
+/// @brief Supported platform info.
 typedef enum ol_platform_info_t {
   /// [char[]] The string denoting name of the platform. The size of the info
   /// needs to be dynamically queried.
@@ -249,7 +263,7 @@ typedef enum ol_platform_info_t {
 } ol_platform_info_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Identifies the native backend of the platform
+/// @brief Identifies the native backend of the platform.
 typedef enum ol_platform_backend_t {
   /// The backend is not recognized
   OL_PLATFORM_BACKEND_UNKNOWN = 0,
@@ -264,7 +278,7 @@ typedef enum ol_platform_backend_t {
 } ol_platform_backend_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Queries the given property of the platform
+/// @brief Queries the given property of the platform.
 ///
 /// @details
 ///    - `olGetPlatformInfoSize` can be used to query the storage size required
@@ -298,7 +312,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfo(
     void *PropValue);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Returns the storage size of the given platform query
+/// @brief Returns the storage size of the given platform query.
 ///
 /// @details
 ///
@@ -322,7 +336,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoSize(
     size_t *PropSizeRet);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Supported device types
+/// @brief Supported device types.
 typedef enum ol_device_type_t {
   /// The default device type as preferred by the runtime
   OL_DEVICE_TYPE_DEFAULT = 0,
@@ -339,7 +353,7 @@ typedef enum ol_device_type_t {
 } ol_device_type_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Supported device info
+/// @brief Supported device info.
 typedef enum ol_device_info_t {
   /// [ol_device_type_t] type of the device
   OL_DEVICE_INFO_TYPE = 0,
@@ -358,7 +372,7 @@ typedef enum ol_device_info_t {
 } ol_device_info_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves the number of available devices within a platform
+/// @brief Retrieves the number of available devices within a platform.
 ///
 /// @details
 ///
@@ -377,7 +391,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCount(
     uint32_t *NumDevices);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves devices within a platform
+/// @brief Retrieves devices within a platform.
 ///
 /// @details
 ///    - Multiple calls to this function will return identical device handles,
@@ -405,7 +419,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(
     ol_device_handle_t *Devices);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Queries the given property of the device
+/// @brief Queries the given property of the device.
 ///
 /// @details
 ///
@@ -437,7 +451,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(
     void *PropValue);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Returns the storage size of the given device query
+/// @brief Returns the storage size of the given device query.
 ///
 /// @details
 ///
@@ -460,6 +474,396 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
     // [out] pointer to the number of bytes required to store the query
     size_t *PropSizeRet);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Return the special host device used to represent the host in memory
+/// transfer operations.
+///
+/// @details
+///    - The host device does not support queues
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == Device`
+OL_APIEXPORT ol_result_t OL_APICALL olGetHostDevice(
+    //  Output pointer for the device
+    ol_device_handle_t *Device);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Represents the type of allocation made with olMemAlloc.
+typedef enum ol_alloc_type_t {
+  /// Host allocation
+  OL_ALLOC_TYPE_HOST = 0,
+  /// Device allocation
+  OL_ALLOC_TYPE_DEVICE = 1,
+  /// Shared allocation
+  OL_ALLOC_TYPE_SHARED = 2,
+  /// @cond
+  OL_ALLOC_TYPE_FORCE_UINT32 = 0x7fffffff
+  /// @endcond
+
+} ol_alloc_type_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a memory allocation on the specified device.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_SIZE
+///         + `Size == 0`
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == AllocationOut`
+OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(
+    // [in] handle of the device to allocate on
+    ol_device_handle_t Device,
+    // [in] type of the allocation
+    ol_alloc_type_t Type,
+    // [in] size of the allocation in bytes
+    size_t Size,
+    // [out] output for the allocated pointer
+    void **AllocationOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Frees a memory allocation previously made by olMemAlloc.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == Address`
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
+    // [in] handle of the device to allocate on
+    ol_device_handle_t Device,
+    // [in] type of the allocation
+    ol_alloc_type_t Type,
+    // [in] address of the allocation to free
+    void *Address);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device.
+///
+/// @details
+///    - The created queue has an initial reference count of 1.
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == Queue`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(
+    // [in] handle of the device
+    ol_device_handle_t Device,
+    // [out] output pointer for the created queue
+    ol_queue_handle_t *Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Increment the queue's reference count.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decrement the queues's reference count, and free it if the reference
+/// count reaches 0.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Wait for the enqueued work on a queue to complete.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olWaitQueue(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Increment the event's reference count.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Event`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(
+    // [in] handle of the event
+    ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decrement the event's reference count, and free it if the reference
+/// count reaches 0.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Event`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(
+    // [in] handle of the event
+    ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Wait for the event to be complete.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Event`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
+    // [in] handle of the event
+    ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a memcpy operation.
+///
+/// @details
+///    - For host pointers, use the device returned by olGetHostDevice
+///    - At least one device must be a non-host device
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_SIZE
+///         + `Size == 0`
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///         + `NULL == DstDevice`
+///         + `NULL == SrcDevice`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == DstPtr`
+///         + `NULL == SrcPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] pointer to copy to
+    void *DstPtr,
+    // [in] device that DstPtr belongs to
+    ol_device_handle_t DstDevice,
+    // [in] pointer to copy from
+    void *SrcPtr,
+    // [in] device that SrcPtr belongs to
+    ol_device_handle_t SrcDevice,
+    // [in] size in bytes of data to copy
+    size_t Size,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Size-related arguments for a kernel launch.
+typedef struct ol_kernel_launch_size_args_t {
+  size_t Dimensions; /// Number of work dimensions
+  size_t NumGroupsX; /// Number of work groups on the X dimension
+  size_t NumGroupsY; /// Number of work groups on the Y dimension
+  size_t NumGroupsZ; /// Number of work groups on the Z dimension
+  size_t GroupSizeX; /// Size of a work group on the X dimension.
+  size_t GroupSizeY; /// Size of a work group on the Y dimension.
+  size_t GroupSizeZ; /// Size of a work group on the Z dimension.
+} ol_kernel_launch_size_args_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a kernel launch with the specified size and parameters.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == ArgumentsData`
+///         + `NULL == LaunchSizeArgs`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel,
+    // [in] pointer to the kernel argument struct
+    const void *ArgumentsData,
+    // [in] size of the kernel argument struct
+    size_t ArgumentsSize,
+    // [in] pointer to the struct containing launch size parameters
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a program for the device from the binary image pointed to by
+/// `ProgData`.
+///
+/// @details
+///    - The created program has an initial reference count of 1.
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == ProgData`
+///         + `NULL == Program`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
+    // [in] handle of the device
+    ol_device_handle_t Device,
+    // [in] pointer to the program binary data
+    const void *ProgData,
+    // [in] size of the program binary in bytes
+    size_t ProgDataSize,
+    // [out] output pointer for the created program
+    ol_program_handle_t *Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Increment the program's reference count.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Program`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainProgram(
+    // [in] handle of the program
+    ol_program_handle_t Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decrement the program's reference count, and free it if the reference
+/// count reaches 0.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Program`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgram(
+    // [in] handle of the program
+    ol_program_handle_t Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a kernel from the function identified by `KernelName` in the
+/// given program.
+///
+/// @details
+///    - The created kernel has an initial reference count of 1.
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Program`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == KernelName`
+///         + `NULL == Kernel`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
+    // [in] handle of the program
+    ol_program_handle_t Program,
+    // [in] name of the kernel entry point in the program
+    const char *KernelName,
+    // [out] output pointer for the created kernel
+    ol_kernel_handle_t *Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Increment the kernel's reference count.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decrement the kernel's reference count, and free it if the reference
+/// count reaches 0.
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetPlatform
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -530,6 +934,154 @@ typedef struct ol_get_device_info_size_params_t {
   size_t **pPropSizeRet;
 } ol_get_device_info_size_params_t;
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olGetHostDevice
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_get_host_device_params_t {
+  ol_device_handle_t **pDevice;
+} ol_get_host_device_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemAlloc
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_mem_alloc_params_t {
+  ol_device_handle_t *pDevice;
+  ol_alloc_type_t *pType;
+  size_t *pSize;
+  void ***pAllocationOut;
+} ol_mem_alloc_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemFree
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_mem_free_params_t {
+  ol_device_handle_t *pDevice;
+  ol_alloc_type_t *pType;
+  void **pAddress;
+} ol_mem_free_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_queue_params_t {
+  ol_device_handle_t *pDevice;
+  ol_queue_handle_t **pQueue;
+} ol_create_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_queue_params_t {
+  ol_queue_handle_t *pQueue;
+} ol_retain_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_queue_params_t {
+  ol_queue_handle_t *pQueue;
+} ol_release_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olWaitQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_wait_queue_params_t {
+  ol_queue_handle_t *pQueue;
+} ol_wait_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_event_params_t {
+  ol_event_handle_t *pEvent;
+} ol_retain_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_event_params_t {
+  ol_event_handle_t *pEvent;
+} ol_release_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olWaitEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_wait_event_params_t {
+  ol_event_handle_t *pEvent;
+} ol_wait_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueMemcpy
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_memcpy_params_t {
+  ol_queue_handle_t *pQueue;
+  void **pDstPtr;
+  ol_device_handle_t *pDstDevice;
+  void **pSrcPtr;
+  ol_device_handle_t *pSrcDevice;
+  size_t *pSize;
+  ol_event_handle_t **pEventOut;
+} ol_enqueue_memcpy_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueKernelLaunch
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_kernel_launch_params_t {
+  ol_queue_handle_t *pQueue;
+  ol_kernel_handle_t *pKernel;
+  const void **pArgumentsData;
+  size_t *pArgumentsSize;
+  const ol_kernel_launch_size_args_t **pLaunchSizeArgs;
+  ol_event_handle_t **pEventOut;
+} ol_enqueue_kernel_launch_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_program_params_t {
+  ol_device_handle_t *pDevice;
+  const void **pProgData;
+  size_t *pProgDataSize;
+  ol_program_handle_t **pProgram;
+} ol_create_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_program_params_t {
+  ol_program_handle_t *pProgram;
+} ol_retain_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_program_params_t {
+  ol_program_handle_t *pProgram;
+} ol_release_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_kernel_params_t {
+  ol_program_handle_t *pProgram;
+  const char **pKernelName;
+  ol_kernel_handle_t **pKernel;
+} ol_create_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_kernel_params_t {
+  ol_kernel_handle_t *pKernel;
+} ol_retain_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_kernel_params_t {
+  ol_kernel_handle_t *pKernel;
+} ol_release_kernel_params_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olInit that also sets source code location information
 /// @details See also ::olInit
@@ -605,6 +1157,140 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc(
     ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet,
     ol_code_location_t *CodeLocation);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olGetHostDevice that also sets source code location
+/// information
+/// @details See also ::olGetHostDevice
+OL_APIEXPORT ol_result_t OL_APICALL olGetHostDeviceWithCodeLoc(
+    ol_device_handle_t *Device, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemAlloc that also sets source code location information
+/// @details See also ::olMemAlloc
+OL_APIEXPORT ol_result_t OL_APICALL olMemAllocWithCodeLoc(
+    ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size,
+    void **AllocationOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemFree that also sets source code location information
+/// @details See also ::olMemFree
+OL_APIEXPORT ol_result_t OL_APICALL
+olMemFreeWithCodeLoc(ol_device_handle_t Device, ol_alloc_type_t Type,
+                     void *Address, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateQueue that also sets source code location
+/// information
+/// @details See also ::olCreateQueue
+OL_APIEXPORT ol_result_t OL_APICALL
+olCreateQueueWithCodeLoc(ol_device_handle_t Device, ol_queue_handle_t *Queue,
+                         ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainQueue that also sets source code location
+/// information
+/// @details See also ::olRetainQueue
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueueWithCodeLoc(
+    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseQueue that also sets source code location
+/// information
+/// @details See also ::olReleaseQueue
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueueWithCodeLoc(
+    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olWaitQueue that also sets source code location
+/// information
+/// @details See also ::olWaitQueue
+OL_APIEXPORT ol_result_t OL_APICALL olWaitQueueWithCodeLoc(
+    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainEvent that also sets source code location
+/// information
+/// @details See also ::olRetainEvent
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEventWithCodeLoc(
+    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseEvent that also sets source code location
+/// information
+/// @details See also ::olReleaseEvent
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEventWithCodeLoc(
+    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olWaitEvent that also sets source code location
+/// information
+/// @details See also ::olWaitEvent
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
+    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueMemcpy that also sets source code location
+/// information
+/// @details See also ::olEnqueueMemcpy
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyWithCodeLoc(
+    ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+    void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueKernelLaunch that also sets source code location
+/// information
+/// @details See also ::olEnqueueKernelLaunch
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunchWithCodeLoc(
+    ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+    const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateProgram that also sets source code location
+/// information
+/// @details See also ::olCreateProgram
+OL_APIEXPORT ol_result_t OL_APICALL olCreateProgramWithCodeLoc(
+    ol_device_handle_t Device, const void *ProgData, size_t ProgDataSize,
+    ol_program_handle_t *Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainProgram that also sets source code location
+/// information
+/// @details See also ::olRetainProgram
+OL_APIEXPORT ol_result_t OL_APICALL olRetainProgramWithCodeLoc(
+    ol_program_handle_t Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseProgram that also sets source code location
+/// information
+/// @details See also ::olReleaseProgram
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgramWithCodeLoc(
+    ol_program_handle_t Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateKernel that also sets source code location
+/// information
+/// @details See also ::olCreateKernel
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernelWithCodeLoc(
+    ol_program_handle_t Program, const char *KernelName,
+    ol_kernel_handle_t *Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainKernel that also sets source code location
+/// information
+/// @details See also ::olRetainKernel
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernelWithCodeLoc(
+    ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseKernel that also sets source code location
+/// information
+/// @details See also ::olReleaseKernel
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernelWithCodeLoc(
+    ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
+
 #if defined(__cplusplus)
 } // extern "C"
 #endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 49c1c8169615e..97cd2eb552c96 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -8,30 +8,30 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olInit_val() {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
   }
 
-  return olInit_impl();
+  return llvm::offload::olInit_impl();
 }
 OL_APIEXPORT ol_result_t OL_APICALL olInit() {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olInit";
+    llvm::errs() << "---> olInit";
   }
 
   ol_result_t Result = olInit_val();
 
   if (offloadConfig().TracingEnabled) {
-    std::cout << "()";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "()";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
 }
 ol_result_t olInitWithCodeLoc(ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olInit();
+  ol_result_t Result = ::olInit();
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -39,30 +39,30 @@ ol_result_t olInitWithCodeLoc(ol_code_location_t *CodeLocation) {
 
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olShutDown_val() {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
   }
 
-  return olShutDown_impl();
+  return llvm::offload::olShutDown_impl();
 }
 OL_APIEXPORT ol_result_t OL_APICALL olShutDown() {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olShutDown";
+    llvm::errs() << "---> olShutDown";
   }
 
   ol_result_t Result = olShutDown_val();
 
   if (offloadConfig().TracingEnabled) {
-    std::cout << "()";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "()";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
 }
 ol_result_t olShutDownWithCodeLoc(ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olShutDown();
+  ol_result_t Result = ::olShutDown();
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -71,7 +71,7 @@ ol_result_t olShutDownWithCodeLoc(ol_code_location_t *CodeLocation) {
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olGetPlatform_val(uint32_t NumEntries,
                                    ol_platform_handle_t *Platforms) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (NumEntries == 0) {
       return OL_ERRC_INVALID_SIZE;
     }
@@ -81,22 +81,22 @@ ol_impl_result_t olGetPlatform_val(uint32_t NumEntries,
     }
   }
 
-  return olGetPlatform_impl(NumEntries, Platforms);
+  return llvm::offload::olGetPlatform_impl(NumEntries, Platforms);
 }
 OL_APIEXPORT ol_result_t OL_APICALL
 olGetPlatform(uint32_t NumEntries, ol_platform_handle_t *Platforms) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetPlatform";
+    llvm::errs() << "---> olGetPlatform";
   }
 
   ol_result_t Result = olGetPlatform_val(NumEntries, Platforms);
 
   if (offloadConfig().TracingEnabled) {
     ol_get_platform_params_t Params = {&NumEntries, &Platforms};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -105,7 +105,7 @@ ol_result_t olGetPlatformWithCodeLoc(uint32_t NumEntries,
                                      ol_platform_handle_t *Platforms,
                                      ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetPlatform(NumEntries, Platforms);
+  ol_result_t Result = ::olGetPlatform(NumEntries, Platforms);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -113,27 +113,27 @@ ol_result_t olGetPlatformWithCodeLoc(uint32_t NumEntries,
 
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olGetPlatformCount_val(uint32_t *NumPlatforms) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (NULL == NumPlatforms) {
       return OL_ERRC_INVALID_NULL_POINTER;
     }
   }
 
-  return olGetPlatformCount_impl(NumPlatforms);
+  return llvm::offload::olGetPlatformCount_impl(NumPlatforms);
 }
 OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(uint32_t *NumPlatforms) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetPlatformCount";
+    llvm::errs() << "---> olGetPlatformCount";
   }
 
   ol_result_t Result = olGetPlatformCount_val(NumPlatforms);
 
   if (offloadConfig().TracingEnabled) {
     ol_get_platform_count_params_t Params = {&NumPlatforms};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -141,7 +141,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(uint32_t *NumPlatforms) {
 ol_result_t olGetPlatformCountWithCodeLoc(uint32_t *NumPlatforms,
                                           ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetPlatformCount(NumPlatforms);
+  ol_result_t Result = ::olGetPlatformCount(NumPlatforms);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -151,7 +151,7 @@ ol_result_t olGetPlatformCountWithCodeLoc(uint32_t *NumPlatforms,
 ol_impl_result_t olGetPlatformInfo_val(ol_platform_handle_t Platform,
                                        ol_platform_info_t PropName,
                                        size_t PropSize, void *PropValue) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (PropSize == 0) {
       return OL_ERRC_INVALID_SIZE;
     }
@@ -165,13 +165,14 @@ ol_impl_result_t olGetPlatformInfo_val(ol_platform_handle_t Platform,
     }
   }
 
-  return olGetPlatformInfo_impl(Platform, PropName, PropSize, PropValue);
+  return llvm::offload::olGetPlatformInfo_impl(Platform, PropName, PropSize,
+                                               PropValue);
 }
 OL_APIEXPORT ol_result_t OL_APICALL
 olGetPlatformInfo(ol_platform_handle_t Platform, ol_platform_info_t PropName,
                   size_t PropSize, void *PropValue) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetPlatformInfo";
+    llvm::errs() << "---> olGetPlatformInfo";
   }
 
   ol_result_t Result =
@@ -180,10 +181,10 @@ olGetPlatformInfo(ol_platform_handle_t Platform, ol_platform_info_t PropName,
   if (offloadConfig().TracingEnabled) {
     ol_get_platform_info_params_t Params = {&Platform, &PropName, &PropSize,
                                             &PropValue};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -194,7 +195,7 @@ ol_result_t olGetPlatformInfoWithCodeLoc(ol_platform_handle_t Platform,
                                          ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
   ol_result_t Result =
-      olGetPlatformInfo(Platform, PropName, PropSize, PropValue);
+      ::olGetPlatformInfo(Platform, PropName, PropSize, PropValue);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -204,7 +205,7 @@ ol_result_t olGetPlatformInfoWithCodeLoc(ol_platform_handle_t Platform,
 ol_impl_result_t olGetPlatformInfoSize_val(ol_platform_handle_t Platform,
                                            ol_platform_info_t PropName,
                                            size_t *PropSizeRet) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (NULL == Platform) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
@@ -214,13 +215,14 @@ ol_impl_result_t olGetPlatformInfoSize_val(ol_platform_handle_t Platform,
     }
   }
 
-  return olGetPlatformInfoSize_impl(Platform, PropName, PropSizeRet);
+  return llvm::offload::olGetPlatformInfoSize_impl(Platform, PropName,
+                                                   PropSizeRet);
 }
 OL_APIEXPORT ol_result_t OL_APICALL
 olGetPlatformInfoSize(ol_platform_handle_t Platform,
                       ol_platform_info_t PropName, size_t *PropSizeRet) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetPlatformInfoSize";
+    llvm::errs() << "---> olGetPlatformInfoSize";
   }
 
   ol_result_t Result =
@@ -229,10 +231,10 @@ olGetPlatformInfoSize(ol_platform_handle_t Platform,
   if (offloadConfig().TracingEnabled) {
     ol_get_platform_info_size_params_t Params = {&Platform, &PropName,
                                                  &PropSizeRet};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -242,7 +244,7 @@ ol_result_t olGetPlatformInfoSizeWithCodeLoc(ol_platform_handle_t Platform,
                                              size_t *PropSizeRet,
                                              ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetPlatformInfoSize(Platform, PropName, PropSizeRet);
+  ol_result_t Result = ::olGetPlatformInfoSize(Platform, PropName, PropSizeRet);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -251,7 +253,7 @@ ol_result_t olGetPlatformInfoSizeWithCodeLoc(ol_platform_handle_t Platform,
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olGetDeviceCount_val(ol_platform_handle_t Platform,
                                       uint32_t *NumDevices) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (NULL == Platform) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
@@ -261,22 +263,22 @@ ol_impl_result_t olGetDeviceCount_val(ol_platform_handle_t Platform,
     }
   }
 
-  return olGetDeviceCount_impl(Platform, NumDevices);
+  return llvm::offload::olGetDeviceCount_impl(Platform, NumDevices);
 }
 OL_APIEXPORT ol_result_t OL_APICALL
 olGetDeviceCount(ol_platform_handle_t Platform, uint32_t *NumDevices) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetDeviceCount";
+    llvm::errs() << "---> olGetDeviceCount";
   }
 
   ol_result_t Result = olGetDeviceCount_val(Platform, NumDevices);
 
   if (offloadConfig().TracingEnabled) {
     ol_get_device_count_params_t Params = {&Platform, &NumDevices};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -285,7 +287,7 @@ ol_result_t olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform,
                                         uint32_t *NumDevices,
                                         ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetDeviceCount(Platform, NumDevices);
+  ol_result_t Result = ::olGetDeviceCount(Platform, NumDevices);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -295,7 +297,7 @@ ol_result_t olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform,
 ol_impl_result_t olGetDevice_val(ol_platform_handle_t Platform,
                                  uint32_t NumEntries,
                                  ol_device_handle_t *Devices) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (NumEntries == 0) {
       return OL_ERRC_INVALID_SIZE;
     }
@@ -309,23 +311,23 @@ ol_impl_result_t olGetDevice_val(ol_platform_handle_t Platform,
     }
   }
 
-  return olGetDevice_impl(Platform, NumEntries, Devices);
+  return llvm::offload::olGetDevice_impl(Platform, NumEntries, Devices);
 }
 OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(ol_platform_handle_t Platform,
                                                 uint32_t NumEntries,
                                                 ol_device_handle_t *Devices) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetDevice";
+    llvm::errs() << "---> olGetDevice";
   }
 
   ol_result_t Result = olGetDevice_val(Platform, NumEntries, Devices);
 
   if (offloadConfig().TracingEnabled) {
     ol_get_device_params_t Params = {&Platform, &NumEntries, &Devices};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -335,7 +337,7 @@ ol_result_t olGetDeviceWithCodeLoc(ol_platform_handle_t Platform,
                                    ol_device_handle_t *Devices,
                                    ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetDevice(Platform, NumEntries, Devices);
+  ol_result_t Result = ::olGetDevice(Platform, NumEntries, Devices);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -345,7 +347,7 @@ ol_result_t olGetDeviceWithCodeLoc(ol_platform_handle_t Platform,
 ol_impl_result_t olGetDeviceInfo_val(ol_device_handle_t Device,
                                      ol_device_info_t PropName, size_t PropSize,
                                      void *PropValue) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (PropSize == 0) {
       return OL_ERRC_INVALID_SIZE;
     }
@@ -359,14 +361,15 @@ ol_impl_result_t olGetDeviceInfo_val(ol_device_handle_t Device,
     }
   }
 
-  return olGetDeviceInfo_impl(Device, PropName, PropSize, PropValue);
+  return llvm::offload::olGetDeviceInfo_impl(Device, PropName, PropSize,
+                                             PropValue);
 }
 OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(ol_device_handle_t Device,
                                                     ol_device_info_t PropName,
                                                     size_t PropSize,
                                                     void *PropValue) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetDeviceInfo";
+    llvm::errs() << "---> olGetDeviceInfo";
   }
 
   ol_result_t Result =
@@ -375,10 +378,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(ol_device_handle_t Device,
   if (offloadConfig().TracingEnabled) {
     ol_get_device_info_params_t Params = {&Device, &PropName, &PropSize,
                                           &PropValue};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -388,7 +391,7 @@ ol_result_t olGetDeviceInfoWithCodeLoc(ol_device_handle_t Device,
                                        size_t PropSize, void *PropValue,
                                        ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetDeviceInfo(Device, PropName, PropSize, PropValue);
+  ol_result_t Result = ::olGetDeviceInfo(Device, PropName, PropSize, PropValue);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -398,7 +401,7 @@ ol_result_t olGetDeviceInfoWithCodeLoc(ol_device_handle_t Device,
 ol_impl_result_t olGetDeviceInfoSize_val(ol_device_handle_t Device,
                                          ol_device_info_t PropName,
                                          size_t *PropSizeRet) {
-  if (true /*enableParameterValidation*/) {
+  if (offloadConfig().ValidationEnabled) {
     if (NULL == Device) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
@@ -408,12 +411,12 @@ ol_impl_result_t olGetDeviceInfoSize_val(ol_device_handle_t Device,
     }
   }
 
-  return olGetDeviceInfoSize_impl(Device, PropName, PropSizeRet);
+  return llvm::offload::olGetDeviceInfoSize_impl(Device, PropName, PropSizeRet);
 }
 OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
     ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet) {
   if (offloadConfig().TracingEnabled) {
-    std::cout << "---> olGetDeviceInfoSize";
+    llvm::errs() << "---> olGetDeviceInfoSize";
   }
 
   ol_result_t Result = olGetDeviceInfoSize_val(Device, PropName, PropSizeRet);
@@ -421,10 +424,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
   if (offloadConfig().TracingEnabled) {
     ol_get_device_info_size_params_t Params = {&Device, &PropName,
                                                &PropSizeRet};
-    std::cout << "(" << &Params << ")";
-    std::cout << "-> " << Result << "\n";
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
-      std::cout << "     *Error Details* " << Result->Details << " \n";
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
     }
   }
   return Result;
@@ -434,7 +437,782 @@ ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device,
                                            size_t *PropSizeRet,
                                            ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = olGetDeviceInfoSize(Device, PropName, PropSizeRet);
+  ol_result_t Result = ::olGetDeviceInfoSize(Device, PropName, PropSizeRet);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olGetHostDevice_val(ol_device_handle_t *Device) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olGetHostDevice_impl(Device);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olGetHostDevice(ol_device_handle_t *Device) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olGetHostDevice";
+  }
+
+  ol_result_t Result = olGetHostDevice_val(Device);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_get_host_device_params_t Params = {&Device};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olGetHostDeviceWithCodeLoc(ol_device_handle_t *Device,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olGetHostDevice(Device);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                size_t Size, void **AllocationOut) {
+  if (offloadConfig().ValidationEnabled) {
+    if (Size == 0) {
+      return OL_ERRC_INVALID_SIZE;
+    }
+
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == AllocationOut) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olMemAlloc_impl(Device, Type, Size, AllocationOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(ol_device_handle_t Device,
+                                               ol_alloc_type_t Type,
+                                               size_t Size,
+                                               void **AllocationOut) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olMemAlloc";
+  }
+
+  ol_result_t Result = olMemAlloc_val(Device, Type, Size, AllocationOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_mem_alloc_params_t Params = {&Device, &Type, &Size, &AllocationOut};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olMemAllocWithCodeLoc(ol_device_handle_t Device,
+                                  ol_alloc_type_t Type, size_t Size,
+                                  void **AllocationOut,
+                                  ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olMemAlloc(Device, Type, Size, AllocationOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemFree_val(ol_device_handle_t Device, ol_alloc_type_t Type,
+                               void *Address) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Address) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olMemFree_impl(Device, Type, Address);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(ol_device_handle_t Device,
+                                              ol_alloc_type_t Type,
+                                              void *Address) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olMemFree";
+  }
+
+  ol_result_t Result = olMemFree_val(Device, Type, Address);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_mem_free_params_t Params = {&Device, &Type, &Address};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olMemFreeWithCodeLoc(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, void *Address,
+                                 ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olMemFree(Device, Type, Address);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateQueue_val(ol_device_handle_t Device,
+                                   ol_queue_handle_t *Queue) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olCreateQueue_impl(Device, Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(ol_device_handle_t Device,
+                                                  ol_queue_handle_t *Queue) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olCreateQueue";
+  }
+
+  ol_result_t Result = olCreateQueue_val(Device, Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_create_queue_params_t Params = {&Device, &Queue};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olCreateQueueWithCodeLoc(ol_device_handle_t Device,
+                                     ol_queue_handle_t *Queue,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olCreateQueue(Device, Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainQueue_val(ol_queue_handle_t Queue) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olRetainQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(ol_queue_handle_t Queue) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olRetainQueue";
+  }
+
+  ol_result_t Result = olRetainQueue_val(Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_queue_params_t Params = {&Queue};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainQueueWithCodeLoc(ol_queue_handle_t Queue,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olRetainQueue(Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseQueue_val(ol_queue_handle_t Queue) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olReleaseQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(ol_queue_handle_t Queue) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olReleaseQueue";
+  }
+
+  ol_result_t Result = olReleaseQueue_val(Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_queue_params_t Params = {&Queue};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseQueueWithCodeLoc(ol_queue_handle_t Queue,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olReleaseQueue(Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olWaitQueue_val(ol_queue_handle_t Queue) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olWaitQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olWaitQueue(ol_queue_handle_t Queue) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olWaitQueue";
+  }
+
+  ol_result_t Result = olWaitQueue_val(Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_wait_queue_params_t Params = {&Queue};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olWaitQueueWithCodeLoc(ol_queue_handle_t Queue,
+                                   ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olWaitQueue(Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainEvent_val(ol_event_handle_t Event) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Event) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olRetainEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(ol_event_handle_t Event) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olRetainEvent";
+  }
+
+  ol_result_t Result = olRetainEvent_val(Event);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_event_params_t Params = {&Event};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainEventWithCodeLoc(ol_event_handle_t Event,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olRetainEvent(Event);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseEvent_val(ol_event_handle_t Event) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Event) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olReleaseEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(ol_event_handle_t Event) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olReleaseEvent";
+  }
+
+  ol_result_t Result = olReleaseEvent_val(Event);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_event_params_t Params = {&Event};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseEventWithCodeLoc(ol_event_handle_t Event,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olReleaseEvent(Event);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olWaitEvent_val(ol_event_handle_t Event) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Event) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olWaitEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(ol_event_handle_t Event) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olWaitEvent";
+  }
+
+  ol_result_t Result = olWaitEvent_val(Event);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_wait_event_params_t Params = {&Event};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
+                                   ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olWaitEvent(Event);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
+                                     ol_device_handle_t DstDevice, void *SrcPtr,
+                                     ol_device_handle_t SrcDevice, size_t Size,
+                                     ol_event_handle_t *EventOut) {
+  if (offloadConfig().ValidationEnabled) {
+    if (Size == 0) {
+      return OL_ERRC_INVALID_SIZE;
+    }
+
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == DstDevice) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == SrcDevice) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == DstPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == SrcPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olEnqueueMemcpy_impl(Queue, DstPtr, DstDevice, SrcPtr,
+                                             SrcDevice, Size, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
+    ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+    void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+    ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olEnqueueMemcpy";
+  }
+
+  ol_result_t Result = olEnqueueMemcpy_val(Queue, DstPtr, DstDevice, SrcPtr,
+                                           SrcDevice, Size, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_enqueue_memcpy_params_t Params = {
+        &Queue, &DstPtr, &DstDevice, &SrcPtr, &SrcDevice, &Size, &EventOut};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olEnqueueMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
+                                       ol_device_handle_t DstDevice,
+                                       void *SrcPtr,
+                                       ol_device_handle_t SrcDevice,
+                                       size_t Size, ol_event_handle_t *EventOut,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olEnqueueMemcpy(Queue, DstPtr, DstDevice, SrcPtr,
+                                         SrcDevice, Size, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t
+olEnqueueKernelLaunch_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                          const void *ArgumentsData, size_t ArgumentsSize,
+                          const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                          ol_event_handle_t *EventOut) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == ArgumentsData) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == LaunchSizeArgs) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olEnqueueKernelLaunch_impl(
+      Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olEnqueueKernelLaunch(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                      const void *ArgumentsData, size_t ArgumentsSize,
+                      const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                      ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olEnqueueKernelLaunch";
+  }
+
+  ol_result_t Result = olEnqueueKernelLaunch_val(
+      Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_enqueue_kernel_launch_params_t Params = {&Queue,          &Kernel,
+                                                &ArgumentsData,  &ArgumentsSize,
+                                                &LaunchSizeArgs, &EventOut};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olEnqueueKernelLaunchWithCodeLoc(
+    ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+    const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olEnqueueKernelLaunch(
+      Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateProgram_val(ol_device_handle_t Device,
+                                     const void *ProgData, size_t ProgDataSize,
+                                     ol_program_handle_t *Program) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == ProgData) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olCreateProgram_impl(Device, ProgData, ProgDataSize,
+                                             Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olCreateProgram(ol_device_handle_t Device, const void *ProgData,
+                size_t ProgDataSize, ol_program_handle_t *Program) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olCreateProgram";
+  }
+
+  ol_result_t Result =
+      olCreateProgram_val(Device, ProgData, ProgDataSize, Program);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_create_program_params_t Params = {&Device, &ProgData, &ProgDataSize,
+                                         &Program};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olCreateProgramWithCodeLoc(ol_device_handle_t Device,
+                                       const void *ProgData,
+                                       size_t ProgDataSize,
+                                       ol_program_handle_t *Program,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result =
+      ::olCreateProgram(Device, ProgData, ProgDataSize, Program);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainProgram_val(ol_program_handle_t Program) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olRetainProgram_impl(Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olRetainProgram(ol_program_handle_t Program) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olRetainProgram";
+  }
+
+  ol_result_t Result = olRetainProgram_val(Program);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_program_params_t Params = {&Program};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainProgramWithCodeLoc(ol_program_handle_t Program,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olRetainProgram(Program);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseProgram_val(ol_program_handle_t Program) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olReleaseProgram_impl(Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olReleaseProgram(ol_program_handle_t Program) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olReleaseProgram";
+  }
+
+  ol_result_t Result = olReleaseProgram_val(Program);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_program_params_t Params = {&Program};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseProgramWithCodeLoc(ol_program_handle_t Program,
+                                        ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olReleaseProgram(Program);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateKernel_val(ol_program_handle_t Program,
+                                    const char *KernelName,
+                                    ol_kernel_handle_t *Kernel) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == KernelName) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olCreateKernel_impl(Program, KernelName, Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(ol_program_handle_t Program,
+                                                   const char *KernelName,
+                                                   ol_kernel_handle_t *Kernel) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olCreateKernel";
+  }
+
+  ol_result_t Result = olCreateKernel_val(Program, KernelName, Kernel);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_create_kernel_params_t Params = {&Program, &KernelName, &Kernel};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olCreateKernelWithCodeLoc(ol_program_handle_t Program,
+                                      const char *KernelName,
+                                      ol_kernel_handle_t *Kernel,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olCreateKernel(Program, KernelName, Kernel);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainKernel_val(ol_kernel_handle_t Kernel) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olRetainKernel_impl(Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(ol_kernel_handle_t Kernel) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olRetainKernel";
+  }
+
+  ol_result_t Result = olRetainKernel_val(Kernel);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_kernel_params_t Params = {&Kernel};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olRetainKernel(Kernel);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseKernel_val(ol_kernel_handle_t Kernel) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return llvm::offload::olReleaseKernel_impl(Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(ol_kernel_handle_t Kernel) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olReleaseKernel";
+  }
+
+  ol_result_t Result = olReleaseKernel_val(Kernel);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_kernel_params_t Params = {&Kernel};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olReleaseKernel(Kernel);
 
   currentCodeLocation() = nullptr;
   return Result;
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 48115493c790f..c504de68a778e 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -20,6 +20,24 @@ OFFLOAD_FUNC(olGetDeviceCount)
 OFFLOAD_FUNC(olGetDevice)
 OFFLOAD_FUNC(olGetDeviceInfo)
 OFFLOAD_FUNC(olGetDeviceInfoSize)
+OFFLOAD_FUNC(olGetHostDevice)
+OFFLOAD_FUNC(olMemAlloc)
+OFFLOAD_FUNC(olMemFree)
+OFFLOAD_FUNC(olCreateQueue)
+OFFLOAD_FUNC(olRetainQueue)
+OFFLOAD_FUNC(olReleaseQueue)
+OFFLOAD_FUNC(olWaitQueue)
+OFFLOAD_FUNC(olRetainEvent)
+OFFLOAD_FUNC(olReleaseEvent)
+OFFLOAD_FUNC(olWaitEvent)
+OFFLOAD_FUNC(olEnqueueMemcpy)
+OFFLOAD_FUNC(olEnqueueKernelLaunch)
+OFFLOAD_FUNC(olCreateProgram)
+OFFLOAD_FUNC(olRetainProgram)
+OFFLOAD_FUNC(olReleaseProgram)
+OFFLOAD_FUNC(olCreateKernel)
+OFFLOAD_FUNC(olRetainKernel)
+OFFLOAD_FUNC(olReleaseKernel)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -30,5 +48,23 @@ OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
+OFFLOAD_FUNC(olGetHostDeviceWithCodeLoc)
+OFFLOAD_FUNC(olMemAllocWithCodeLoc)
+OFFLOAD_FUNC(olMemFreeWithCodeLoc)
+OFFLOAD_FUNC(olCreateQueueWithCodeLoc)
+OFFLOAD_FUNC(olRetainQueueWithCodeLoc)
+OFFLOAD_FUNC(olReleaseQueueWithCodeLoc)
+OFFLOAD_FUNC(olWaitQueueWithCodeLoc)
+OFFLOAD_FUNC(olRetainEventWithCodeLoc)
+OFFLOAD_FUNC(olReleaseEventWithCodeLoc)
+OFFLOAD_FUNC(olWaitEventWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueMemcpyWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueKernelLaunchWithCodeLoc)
+OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
+OFFLOAD_FUNC(olRetainProgramWithCodeLoc)
+OFFLOAD_FUNC(olReleaseProgramWithCodeLoc)
+OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
+OFFLOAD_FUNC(olRetainKernelWithCodeLoc)
+OFFLOAD_FUNC(olReleaseKernelWithCodeLoc)
 
 #undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 5b26b2653a05d..d6be874613cf5 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -36,3 +36,55 @@ ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device,
 ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                           ol_device_info_t PropName,
                                           size_t *PropSizeRet);
+
+ol_impl_result_t olGetHostDevice_impl(ol_device_handle_t *Device);
+
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, size_t Size,
+                                 void **AllocationOut);
+
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                void *Address);
+
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+                                    ol_queue_handle_t *Queue);
+
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+                                      ol_device_handle_t DstDevice,
+                                      void *SrcPtr,
+                                      ol_device_handle_t SrcDevice, size_t Size,
+                                      ol_event_handle_t *EventOut);
+
+ol_impl_result_t
+olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                           const void *ArgumentsData, size_t ArgumentsSize,
+                           const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                           ol_event_handle_t *EventOut);
+
+ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
+                                      const void *ProgData, size_t ProgDataSize,
+                                      ol_program_handle_t *Program);
+
+ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program);
+
+ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program);
+
+ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
+                                     const char *KernelName,
+                                     ol_kernel_handle_t *Kernel);
+
+ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel);
+
+ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 8981bb054a4cb..662c3e462d3d4 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -11,31 +11,41 @@
 #pragma once
 
 #include <OffloadAPI.h>
-#include <ostream>
+#include <llvm/Support/raw_ostream.h>
 
 template <typename T>
-inline ol_result_t printPtr(std::ostream &os, const T *ptr);
+inline ol_result_t printPtr(llvm::raw_ostream &os, const T *ptr);
 template <typename T>
-inline void printTagged(std::ostream &os, const void *ptr, T value,
+inline void printTagged(llvm::raw_ostream &os, const void *ptr, T value,
                         size_t size);
 template <typename T> struct is_handle : std::false_type {};
 template <> struct is_handle<ol_platform_handle_t> : std::true_type {};
 template <> struct is_handle<ol_device_handle_t> : std::true_type {};
 template <> struct is_handle<ol_context_handle_t> : std::true_type {};
+template <> struct is_handle<ol_queue_handle_t> : std::true_type {};
+template <> struct is_handle<ol_event_handle_t> : std::true_type {};
+template <> struct is_handle<ol_program_handle_t> : std::true_type {};
+template <> struct is_handle<ol_kernel_handle_t> : std::true_type {};
 template <typename T> inline constexpr bool is_handle_v = is_handle<T>::value;
 
-inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value);
-inline std::ostream &operator<<(std::ostream &os,
-                                enum ol_platform_info_t value);
-inline std::ostream &operator<<(std::ostream &os,
-                                enum ol_platform_backend_t value);
-inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value);
-inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value);
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_errc_t value);
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_platform_info_t value);
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_platform_backend_t value);
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_device_type_t value);
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_device_info_t value);
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_alloc_type_t value);
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ol_errc_t type
-/// @returns std::ostream &
-inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value) {
+/// @returns llvm::raw_ostream &
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_errc_t value) {
   switch (value) {
   case OL_ERRC_SUCCESS:
     os << "OL_ERRC_SUCCESS";
@@ -46,24 +56,21 @@ inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value) {
   case OL_ERRC_INVALID_PLATFORM:
     os << "OL_ERRC_INVALID_PLATFORM";
     break;
-  case OL_ERRC_DEVICE_NOT_FOUND:
-    os << "OL_ERRC_DEVICE_NOT_FOUND";
-    break;
   case OL_ERRC_INVALID_DEVICE:
     os << "OL_ERRC_INVALID_DEVICE";
     break;
-  case OL_ERRC_DEVICE_LOST:
-    os << "OL_ERRC_DEVICE_LOST";
+  case OL_ERRC_INVALID_QUEUE:
+    os << "OL_ERRC_INVALID_QUEUE";
+    break;
+  case OL_ERRC_INVALID_EVENT:
+    os << "OL_ERRC_INVALID_EVENT";
     break;
-  case OL_ERRC_UNINITIALIZED:
-    os << "OL_ERRC_UNINITIALIZED";
+  case OL_ERRC_INVALID_KERNEL_NAME:
+    os << "OL_ERRC_INVALID_KERNEL_NAME";
     break;
   case OL_ERRC_OUT_OF_RESOURCES:
     os << "OL_ERRC_OUT_OF_RESOURCES";
     break;
-  case OL_ERRC_UNSUPPORTED_VERSION:
-    os << "OL_ERRC_UNSUPPORTED_VERSION";
-    break;
   case OL_ERRC_UNSUPPORTED_FEATURE:
     os << "OL_ERRC_UNSUPPORTED_FEATURE";
     break;
@@ -97,9 +104,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value) {
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ol_platform_info_t type
-/// @returns std::ostream &
-inline std::ostream &operator<<(std::ostream &os,
-                                enum ol_platform_info_t value) {
+/// @returns llvm::raw_ostream &
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_platform_info_t value) {
   switch (value) {
   case OL_PLATFORM_INFO_NAME:
     os << "OL_PLATFORM_INFO_NAME";
@@ -122,9 +129,9 @@ inline std::ostream &operator<<(std::ostream &os,
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print type-tagged ol_platform_info_t enum value
-/// @returns std::ostream &
+/// @returns llvm::raw_ostream &
 template <>
-inline void printTagged(std::ostream &os, const void *ptr,
+inline void printTagged(llvm::raw_ostream &os, const void *ptr,
                         ol_platform_info_t value, size_t size) {
   if (ptr == NULL) {
     printPtr(os, ptr);
@@ -159,9 +166,9 @@ inline void printTagged(std::ostream &os, const void *ptr,
 }
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ol_platform_backend_t type
-/// @returns std::ostream &
-inline std::ostream &operator<<(std::ostream &os,
-                                enum ol_platform_backend_t value) {
+/// @returns llvm::raw_ostream &
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_platform_backend_t value) {
   switch (value) {
   case OL_PLATFORM_BACKEND_UNKNOWN:
     os << "OL_PLATFORM_BACKEND_UNKNOWN";
@@ -181,8 +188,9 @@ inline std::ostream &operator<<(std::ostream &os,
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ol_device_type_t type
-/// @returns std::ostream &
-inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value) {
+/// @returns llvm::raw_ostream &
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_device_type_t value) {
   switch (value) {
   case OL_DEVICE_TYPE_DEFAULT:
     os << "OL_DEVICE_TYPE_DEFAULT";
@@ -205,8 +213,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value) {
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ol_device_info_t type
-/// @returns std::ostream &
-inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value) {
+/// @returns llvm::raw_ostream &
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_device_info_t value) {
   switch (value) {
   case OL_DEVICE_INFO_TYPE:
     os << "OL_DEVICE_INFO_TYPE";
@@ -232,9 +241,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value) {
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print type-tagged ol_device_info_t enum value
-/// @returns std::ostream &
+/// @returns llvm::raw_ostream &
 template <>
-inline void printTagged(std::ostream &os, const void *ptr,
+inline void printTagged(llvm::raw_ostream &os, const void *ptr,
                         ol_device_info_t value, size_t size) {
   if (ptr == NULL) {
     printPtr(os, ptr);
@@ -274,9 +283,30 @@ inline void printTagged(std::ostream &os, const void *ptr,
     break;
   }
 }
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_alloc_type_t type
+/// @returns llvm::raw_ostream &
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     enum ol_alloc_type_t value) {
+  switch (value) {
+  case OL_ALLOC_TYPE_HOST:
+    os << "OL_ALLOC_TYPE_HOST";
+    break;
+  case OL_ALLOC_TYPE_DEVICE:
+    os << "OL_ALLOC_TYPE_DEVICE";
+    break;
+  case OL_ALLOC_TYPE_SHARED:
+    os << "OL_ALLOC_TYPE_SHARED";
+    break;
+  default:
+    os << "unknown enumerator";
+    break;
+  }
+  return os;
+}
 
-inline std::ostream &operator<<(std::ostream &os,
-                                const ol_error_struct_t *Err) {
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     const ol_error_struct_t *Err) {
   if (Err == nullptr) {
     os << "OL_SUCCESS";
   } else {
@@ -284,9 +314,62 @@ inline std::ostream &operator<<(std::ostream &os,
   }
   return os;
 }
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_code_location_t type
+/// @returns llvm::raw_ostream &
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     const struct ol_code_location_t params) {
+  os << "(struct ol_code_location_t){";
+  os << ".FunctionName = ";
+  printPtr(os, params.FunctionName);
+  os << ", ";
+  os << ".SourceFile = ";
+  printPtr(os, params.SourceFile);
+  os << ", ";
+  os << ".LineNumber = ";
+  os << params.LineNumber;
+  os << ", ";
+  os << ".ColumnNumber = ";
+  os << params.ColumnNumber;
+  os << "}";
+  return os;
+}
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_kernel_launch_size_args_t type
+/// @returns llvm::raw_ostream &
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_kernel_launch_size_args_t params) {
+  os << "(struct ol_kernel_launch_size_args_t){";
+  os << ".Dimensions = ";
+  os << params.Dimensions;
+  os << ", ";
+  os << ".NumGroupsX = ";
+  os << params.NumGroupsX;
+  os << ", ";
+  os << ".NumGroupsY = ";
+  os << params.NumGroupsY;
+  os << ", ";
+  os << ".NumGroupsZ = ";
+  os << params.NumGroupsZ;
+  os << ", ";
+  os << ".GroupSizeX = ";
+  os << params.GroupSizeX;
+  os << ", ";
+  os << ".GroupSizeY = ";
+  os << params.GroupSizeY;
+  os << ", ";
+  os << ".GroupSizeZ = ";
+  os << params.GroupSizeZ;
+  os << "}";
+  return os;
+}
 
-inline std::ostream &operator<<(std::ostream &os,
-                                const struct ol_get_platform_params_t *params) {
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_get_platform_params_t *params) {
   os << ".NumEntries = ";
   os << *params->pNumEntries;
   os << ", ";
@@ -302,16 +385,16 @@ inline std::ostream &operator<<(std::ostream &os,
   return os;
 }
 
-inline std::ostream &
-operator<<(std::ostream &os,
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
            const struct ol_get_platform_count_params_t *params) {
   os << ".NumPlatforms = ";
   printPtr(os, *params->pNumPlatforms);
   return os;
 }
 
-inline std::ostream &
-operator<<(std::ostream &os,
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
            const struct ol_get_platform_info_params_t *params) {
   os << ".Platform = ";
   printPtr(os, *params->pPlatform);
@@ -327,8 +410,8 @@ operator<<(std::ostream &os,
   return os;
 }
 
-inline std::ostream &
-operator<<(std::ostream &os,
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
            const struct ol_get_platform_info_size_params_t *params) {
   os << ".Platform = ";
   printPtr(os, *params->pPlatform);
@@ -341,8 +424,8 @@ operator<<(std::ostream &os,
   return os;
 }
 
-inline std::ostream &
-operator<<(std::ostream &os,
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
            const struct ol_get_device_count_params_t *params) {
   os << ".Platform = ";
   printPtr(os, *params->pPlatform);
@@ -352,8 +435,8 @@ operator<<(std::ostream &os,
   return os;
 }
 
-inline std::ostream &operator<<(std::ostream &os,
-                                const struct ol_get_device_params_t *params) {
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const struct ol_get_device_params_t *params) {
   os << ".Platform = ";
   printPtr(os, *params->pPlatform);
   os << ", ";
@@ -372,8 +455,9 @@ inline std::ostream &operator<<(std::ostream &os,
   return os;
 }
 
-inline std::ostream &
-operator<<(std::ostream &os, const struct ol_get_device_info_params_t *params) {
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_get_device_info_params_t *params) {
   os << ".Device = ";
   printPtr(os, *params->pDevice);
   os << ", ";
@@ -388,8 +472,8 @@ operator<<(std::ostream &os, const struct ol_get_device_info_params_t *params) {
   return os;
 }
 
-inline std::ostream &
-operator<<(std::ostream &os,
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
            const struct ol_get_device_info_size_params_t *params) {
   os << ".Device = ";
   printPtr(os, *params->pDevice);
@@ -402,10 +486,216 @@ operator<<(std::ostream &os,
   return os;
 }
 
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_get_host_device_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const struct ol_mem_alloc_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Type = ";
+  os << *params->pType;
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".AllocationOut = ";
+  printPtr(os, *params->pAllocationOut);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const struct ol_mem_free_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Type = ";
+  os << *params->pType;
+  os << ", ";
+  os << ".Address = ";
+  printPtr(os, *params->pAddress);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_create_queue_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_retain_queue_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_release_queue_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const struct ol_wait_queue_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_retain_event_params_t *params) {
+  os << ".Event = ";
+  printPtr(os, *params->pEvent);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_release_event_params_t *params) {
+  os << ".Event = ";
+  printPtr(os, *params->pEvent);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const struct ol_wait_event_params_t *params) {
+  os << ".Event = ";
+  printPtr(os, *params->pEvent);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_enqueue_memcpy_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".DstPtr = ";
+  printPtr(os, *params->pDstPtr);
+  os << ", ";
+  os << ".DstDevice = ";
+  printPtr(os, *params->pDstDevice);
+  os << ", ";
+  os << ".SrcPtr = ";
+  printPtr(os, *params->pSrcPtr);
+  os << ", ";
+  os << ".SrcDevice = ";
+  printPtr(os, *params->pSrcDevice);
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_enqueue_kernel_launch_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  os << ", ";
+  os << ".ArgumentsData = ";
+  printPtr(os, *params->pArgumentsData);
+  os << ", ";
+  os << ".ArgumentsSize = ";
+  os << *params->pArgumentsSize;
+  os << ", ";
+  os << ".LaunchSizeArgs = ";
+  printPtr(os, *params->pLaunchSizeArgs);
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_create_program_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".ProgData = ";
+  printPtr(os, *params->pProgData);
+  os << ", ";
+  os << ".ProgDataSize = ";
+  os << *params->pProgDataSize;
+  os << ", ";
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_retain_program_params_t *params) {
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_release_program_params_t *params) {
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_create_kernel_params_t *params) {
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  os << ", ";
+  os << ".KernelName = ";
+  printPtr(os, *params->pKernelName);
+  os << ", ";
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_retain_kernel_params_t *params) {
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_release_kernel_params_t *params) {
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  return os;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // @brief Print pointer value
 template <typename T>
-inline ol_result_t printPtr(std::ostream &os, const T *ptr) {
+inline ol_result_t printPtr(llvm::raw_ostream &os, const T *ptr) {
   if (ptr == nullptr) {
     os << "nullptr";
   } else if constexpr (std::is_pointer_v<T>) {
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 457f1053f1634..9e81d905d0216 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -19,27 +19,6 @@
 
 #include <mutex>
 
-using namespace llvm;
-using namespace llvm::omp::target::plugin;
-
-// Handle type definitions. Ideally these would be 1:1 with the plugins
-struct ol_device_handle_t_ {
-  int DeviceNum;
-  GenericDeviceTy &Device;
-  ol_platform_handle_t Platform;
-};
-
-struct ol_platform_handle_t_ {
-  std::unique_ptr<GenericPluginTy> Plugin;
-  std::vector<ol_device_handle_t_> Devices;
-};
-
-using PlatformVecT = SmallVector<ol_platform_handle_t_, 4>;
-PlatformVecT &Platforms() {
-  static PlatformVecT Platforms;
-  return Platforms;
-}
-
 // TODO: Some plugins expect to be linked into libomptarget which defines these
 // symbols to implement ompt callbacks. The least invasive workaround here is to
 // define them in libLLVMOffload as false/null so they are never used. In future
@@ -55,6 +34,74 @@ ompt_function_lookup_t lookupCallbackByName = nullptr;
 } // namespace llvm::omp::target
 #endif
 
+using namespace llvm::omp::target;
+using namespace llvm::omp::target::plugin;
+
+// Handle type definitions. Ideally these would be 1:1 with the plugins, but
+// we add some additional data here for now to avoid churn in the plugin
+// interface.
+
+struct RefCounted {
+  std::atomic_uint32_t RefCount;
+};
+
+struct ol_device_impl_t {
+  int DeviceNum;
+  GenericDeviceTy *Device;
+  ol_platform_handle_t Platform;
+};
+
+struct ol_platform_impl_t {
+  std::unique_ptr<GenericPluginTy> Plugin;
+  std::vector<ol_device_impl_t> Devices;
+};
+
+struct ol_queue_impl_t : RefCounted {
+  __tgt_async_info *AsyncInfo;
+  ol_device_handle_t Device;
+};
+
+struct ol_event_impl_t : RefCounted {
+  void *EventInfo;
+  ol_queue_handle_t Queue;
+};
+
+struct ol_program_impl_t : RefCounted {
+  plugin::DeviceImageTy *Image;
+  std::unique_ptr<llvm::MemoryBuffer> ImageData;
+  __tgt_device_image DeviceImage;
+};
+
+struct ol_kernel_impl_t : RefCounted {
+  GenericKernelTy *KernelImpl;
+};
+
+namespace llvm {
+namespace offload {
+
+using PlatformVecT = SmallVector<ol_platform_impl_t, 4>;
+PlatformVecT &Platforms() {
+  static PlatformVecT Platforms;
+  return Platforms;
+}
+
+ol_device_handle_t HostDevice() {
+  static ol_device_impl_t HostDeviceImpl{-1, nullptr, nullptr};
+  return &HostDeviceImpl;
+}
+
+template <typename HandleT> ol_impl_result_t olRetain(HandleT Handle) {
+  Handle->RefCount++;
+  return OL_SUCCESS;
+}
+
+template <typename HandleT> ol_impl_result_t olRelease(HandleT Handle) {
+  if (--Handle->RefCount == 0) {
+    delete Handle;
+  }
+  return OL_SUCCESS;
+}
+
 // Every plugin exports this method to create an instance of the plugin type.
 #define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name();
 #include "Shared/Targets.def"
@@ -63,7 +110,7 @@ void initPlugins() {
   // Attempt to create an instance of each supported plugin.
 #define PLUGIN_TARGET(Name)                                                    \
   do {                                                                         \
-    Platforms().emplace_back(ol_platform_handle_t_{                            \
+    Platforms().emplace_back(ol_platform_impl_t{                               \
         std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), {}});         \
   } while (false);
 #include "Shared/Targets.def"
@@ -76,13 +123,15 @@ void initPlugins() {
     for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices();
          DevNum++) {
       if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
-        Platform.Devices.emplace_back(ol_device_handle_t_{
-            DevNum, Platform.Plugin->getDevice(DevNum), &Platform});
+        Platform.Devices.emplace_back(ol_device_impl_t{
+            DevNum, &Platform.Plugin->getDevice(DevNum), &Platform});
       }
     }
   }
 
   offloadConfig().TracingEnabled = std::getenv("OFFLOAD_TRACE");
+  offloadConfig().ValidationEnabled =
+      !std::getenv("OFFLOAD_DISABLE_VALIDATION");
 }
 
 // TODO: We can properly reference count here and manage the resources in a more
@@ -175,9 +224,8 @@ ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform,
 ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform,
                                   uint32_t NumEntries,
                                   ol_device_handle_t *Devices) {
-  if (NumEntries > Platform->Devices.size()) {
+  if (NumEntries > Platform->Devices.size())
     return OL_ERRC_INVALID_SIZE;
-  }
 
   for (uint32_t DeviceIndex = 0; DeviceIndex < NumEntries; DeviceIndex++) {
     Devices[DeviceIndex] = &(Platform->Devices[DeviceIndex]);
@@ -194,7 +242,7 @@ ol_impl_result_t olGetDeviceInfoImplDetail(ol_device_handle_t Device,
   ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
 
   InfoQueueTy DevInfo;
-  if (auto Err = Device->Device.obtainInfoImpl(DevInfo))
+  if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
     return OL_ERRC_OUT_OF_RESOURCES;
 
   // Find the info if it exists under any of the given names
@@ -245,3 +293,255 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                           size_t *PropSizeRet) {
   return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
 }
+
+ol_impl_result_t olGetHostDevice_impl(ol_device_handle_t *Device) {
+  *Device = HostDevice();
+  return OL_SUCCESS;
+}
+
+TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
+  switch (Type) {
+  case OL_ALLOC_TYPE_DEVICE:
+    return TARGET_ALLOC_DEVICE;
+  case OL_ALLOC_TYPE_HOST:
+    return TARGET_ALLOC_HOST;
+  case OL_ALLOC_TYPE_SHARED:
+  default:
+    return TARGET_ALLOC_SHARED;
+  }
+}
+
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, size_t Size,
+                                 void **AllocationOut) {
+  auto Alloc =
+      Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
+  if (!Alloc)
+    return {OL_ERRC_OUT_OF_RESOURCES,
+            formatv("Could not create allocation on device {0}", Device).str()};
+
+  *AllocationOut = *Alloc;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                void *Address) {
+  auto Res =
+      Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type));
+  if (Res)
+    return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+                                    ol_queue_handle_t *Queue) {
+  auto CreatedQueue = std::make_unique<ol_queue_impl_t>();
+  auto Err = Device->Device->initAsyncInfo(&(CreatedQueue->AsyncInfo));
+  if (Err)
+    return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
+
+  CreatedQueue->Device = Device;
+  CreatedQueue->RefCount = 1;
+  *Queue = CreatedQueue.release();
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
+  return olRetain(Queue);
+}
+
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
+  return olRelease(Queue);
+}
+
+ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue) {
+  // Host plugin doesn't have a queue set so it's not safe to call synchronize
+  // on it, but we have nothing to synchronize in that situation anyway.
+  if (Queue->AsyncInfo->Queue) {
+    auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo);
+    if (Err)
+      return {OL_ERRC_INVALID_QUEUE, "The queue failed to synchronize"};
+  }
+
+  // Recreate the stream resource so the queue can be reused
+  // TODO: Would be easier for the synchronization to (optionally) not release
+  // it to begin with.
+  auto Res = Queue->Device->Device->initAsyncInfo(&Queue->AsyncInfo);
+  if (Res)
+    return {OL_ERRC_UNKNOWN, "Could not reinitialize the stream resource"};
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
+  auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo);
+  if (Res)
+    return {OL_ERRC_INVALID_EVENT, "The event failed to synchronize"};
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event) {
+  return olRetain(Event);
+}
+
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
+  return olRelease(Event);
+}
+
+ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
+  auto EventImpl = std::make_unique<ol_event_impl_t>();
+  EventImpl->Queue = Queue;
+  auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo);
+  if (Res)
+    return nullptr;
+
+  Res = Queue->Device->Device->recordEvent(EventImpl->EventInfo,
+                                           Queue->AsyncInfo);
+  if (Res)
+    return nullptr;
+
+  return EventImpl.release();
+}
+
+ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+                                      ol_device_handle_t DstDevice,
+                                      void *SrcPtr,
+                                      ol_device_handle_t SrcDevice, size_t Size,
+                                      ol_event_handle_t *EventOut) {
+  if (DstDevice == HostDevice() && SrcDevice == HostDevice()) {
+    // TODO: We could actually handle this with a plain memcpy but we currently
+    // have no way of synchronizing this with the queue
+    return {OL_ERRC_INVALID_ARGUMENT,
+            "One of DstDevice and SrcDevice must be a non-host device"};
+  }
+
+  if (DstDevice == HostDevice()) {
+    auto Res =
+        SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+    if (Res)
+      return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
+  } else if (SrcDevice == HostDevice()) {
+    auto Res =
+        DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+    if (Res)
+      return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
+  } else {
+    auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device,
+                                               DstPtr, Size, Queue->AsyncInfo);
+    if (Res)
+      return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
+  }
+
+  if (EventOut)
+    *EventOut = makeEvent(Queue);
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
+                                      const void *ProgData, size_t ProgDataSize,
+                                      ol_program_handle_t *Program) {
+  // Make a copy of the program binary in case it is released by the caller.
+  auto ImageData = MemoryBuffer::getMemBufferCopy(
+      StringRef(reinterpret_cast<const char *>(ProgData), ProgDataSize));
+
+  ol_program_handle_t Prog = new ol_program_impl_t();
+
+  Prog->DeviceImage = __tgt_device_image{
+      const_cast<char *>(ImageData->getBuffer().data()),
+      const_cast<char *>(ImageData->getBuffer().data()) + ProgDataSize, nullptr,
+      nullptr};
+
+  auto Res =
+      Device->Device->loadBinary(Device->Device->Plugin, &Prog->DeviceImage);
+  if (!Res) {
+    delete Prog;
+    return OL_ERRC_INVALID_VALUE;
+  }
+
+  Prog->Image = *Res;
+  Prog->RefCount = 1;
+  Prog->ImageData = std::move(ImageData);
+  *Program = Prog;
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program) {
+  return olRetain(Program);
+}
+
+ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program) {
+  return olRelease(Program);
+}
+
+ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
+                                     const char *KernelName,
+                                     ol_kernel_handle_t *Kernel) {
+
+  auto &Device = Program->Image->getDevice();
+  auto KernelImpl = Device.constructKernel(KernelName);
+  if (!KernelImpl)
+    return OL_ERRC_INVALID_KERNEL_NAME;
+
+  auto Err = KernelImpl->init(Device, *Program->Image);
+  if (Err)
+    return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
+
+  ol_kernel_handle_t CreatedKernel = new ol_kernel_impl_t();
+  CreatedKernel->RefCount = 1;
+  CreatedKernel->KernelImpl = &*KernelImpl;
+  *Kernel = CreatedKernel;
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel) {
+  return olRetain(Kernel);
+}
+
+ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel) {
+  return olRelease(Kernel);
+}
+
+ol_impl_result_t
+olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                           const void *ArgumentsData, size_t ArgumentsSize,
+                           const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                           ol_event_handle_t *EventOut) {
+  auto *DeviceImpl = Queue->Device->Device;
+
+  AsyncInfoWrapperTy AsyncInfoWrapper(*DeviceImpl, Queue->AsyncInfo);
+
+  KernelArgsTy LaunchArgs{};
+  LaunchArgs.NumTeams[0] = LaunchSizeArgs->NumGroupsX;
+  LaunchArgs.NumTeams[1] = LaunchSizeArgs->NumGroupsY;
+  LaunchArgs.NumTeams[2] = LaunchSizeArgs->NumGroupsZ;
+  LaunchArgs.ThreadLimit[0] = LaunchSizeArgs->GroupSizeX;
+  LaunchArgs.ThreadLimit[1] = LaunchSizeArgs->GroupSizeY;
+  LaunchArgs.ThreadLimit[2] = LaunchSizeArgs->GroupSizeZ;
+
+  KernelLaunchParamsTy Params;
+  Params.Data = const_cast<void *>(ArgumentsData);
+  Params.Size = ArgumentsSize;
+  LaunchArgs.ArgPtrs = reinterpret_cast<void **>(&Params);
+  // Don't do anything with pointer indirection; use arg data as-is
+  LaunchArgs.Flags.IsCUDA = true;
+
+  auto Err = Kernel->KernelImpl->launch(*DeviceImpl, LaunchArgs.ArgPtrs,
+                                        nullptr, LaunchArgs, AsyncInfoWrapper);
+
+  AsyncInfoWrapper.finalize(Err);
+  if (Err)
+    return {OL_ERRC_UNKNOWN, "Could not finalize the AsyncInfoWrapper"};
+
+  if (EventOut)
+    *EventOut = makeEvent(Queue);
+
+  return OL_SUCCESS;
+}
+
+} // namespace offload
+} // namespace llvm
diff --git a/offload/liboffload/src/OffloadLib.cpp b/offload/liboffload/src/OffloadLib.cpp
index 70e1ce1f84d83..8662d3a44124b 100644
--- a/offload/liboffload/src/OffloadLib.cpp
+++ b/offload/liboffload/src/OffloadLib.cpp
@@ -11,11 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "OffloadImpl.hpp"
+#include "llvm/Support/raw_ostream.h"
 #include <OffloadAPI.h>
 #include <OffloadPrint.hpp>
 
-#include <iostream>
-
 llvm::StringSet<> &errorStrs() {
   static llvm::StringSet<> ErrorStrs;
   return ErrorStrs;
@@ -36,9 +35,13 @@ OffloadConfig &offloadConfig() {
   return Config;
 }
 
+namespace llvm {
+namespace offload {
 // Pull in the declarations for the implementation functions. The actual entry
 // points in this file wrap these.
 #include "OffloadImplFuncDecls.inc"
+} // namespace offload
+} // namespace llvm
 
 // Pull in the tablegen'd entry point definitions.
 #include "OffloadEntryPoints.inc"
diff --git a/offload/tools/offload-tblgen/APIGen.cpp b/offload/tools/offload-tblgen/APIGen.cpp
index 97a2464f7a75c..8cc5bd5e452fe 100644
--- a/offload/tools/offload-tblgen/APIGen.cpp
+++ b/offload/tools/offload-tblgen/APIGen.cpp
@@ -41,9 +41,16 @@ static std::string MakeComment(StringRef in) {
 }
 
 static void ProcessHandle(const HandleRec &H, raw_ostream &OS) {
+  if (!H.getName().ends_with("_handle_t")) {
+    errs() << "Handle type name (" << H.getName()
+           << ") must end with '_handle_t'!\n";
+    exit(1);
+  }
+
+  auto ImplName = H.getName().substr(0, H.getName().size() - 9) + "_impl_t";
   OS << CommentsHeader;
   OS << formatv("/// @brief {0}\n", H.getDesc());
-  OS << formatv("typedef struct {0}_ *{0};\n", H.getName());
+  OS << formatv("typedef struct {0} *{1};\n", ImplName, H.getName());
 }
 
 static void ProcessTypedef(const TypedefRec &T, raw_ostream &OS) {
diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp
index 990ff96a3121d..66b9665292e15 100644
--- a/offload/tools/offload-tblgen/EntryPointGen.cpp
+++ b/offload/tools/offload-tblgen/EntryPointGen.cpp
@@ -35,7 +35,7 @@ static void EmitValidationFunc(const FunctionRec &F, raw_ostream &OS) {
   }
   OS << ") {\n";
 
-  OS << TAB_1 "if (true /*enableParameterValidation*/) {\n";
+  OS << TAB_1 "if (offloadConfig().ValidationEnabled) {\n";
   // Emit validation checks
   for (const auto &Return : F.getReturns()) {
     for (auto &Condition : Return.getConditions()) {
@@ -51,7 +51,8 @@ static void EmitValidationFunc(const FunctionRec &F, raw_ostream &OS) {
 
   // Perform actual function call to the implementation
   ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2);
-  OS << formatv(TAB_1 "return {0}_impl({1});\n\n", F.getName(), ParamNameList);
+  OS << formatv(TAB_1 "return llvm::offload::{0}_impl({1});\n\n", F.getName(),
+                ParamNameList);
   OS << "}\n";
 }
 
@@ -72,7 +73,7 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) {
 
   // Emit pre-call prints
   OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n";
-  OS << formatv(TAB_2 "std::cout << \"---> {0}\";\n", F.getName());
+  OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName());
   OS << TAB_1 "}\n\n";
 
   // Perform actual function call to the validation wrapper
@@ -91,13 +92,13 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) {
       }
     }
     OS << formatv("};\n");
-    OS << TAB_2 "std::cout << \"(\" << &Params << \")\";\n";
+    OS << TAB_2 "llvm::errs() << \"(\" << &Params << \")\";\n";
   } else {
-    OS << TAB_2 "std::cout << \"()\";\n";
+    OS << TAB_2 "llvm::errs() << \"()\";\n";
   }
-  OS << TAB_2 "std::cout << \"-> \" << Result << \"\\n\";\n";
+  OS << TAB_2 "llvm::errs() << \"-> \" << Result << \"\\n\";\n";
   OS << TAB_2 "if (Result && Result->Details) {\n";
-  OS << TAB_3 "std::cout << \"     *Error Details* \" << Result->Details "
+  OS << TAB_3 "llvm::errs() << \"     *Error Details* \" << Result->Details "
               "<< \" \\n\";\n";
   OS << TAB_2 "}\n";
   OS << TAB_1 "}\n";
@@ -121,7 +122,7 @@ static void EmitCodeLocWrapper(const FunctionRec &F, raw_ostream &OS) {
   OS << "ol_code_location_t *CodeLocation";
   OS << ") {\n";
   OS << TAB_1 "currentCodeLocation() = CodeLocation;\n";
-  OS << formatv(TAB_1 "{0}_result_t Result = {1}({2});\n\n", PrefixLower,
+  OS << formatv(TAB_1 "{0}_result_t Result = ::{1}({2});\n\n", PrefixLower,
                 F.getName(), ParamNameList);
   OS << TAB_1 "currentCodeLocation() = nullptr;\n";
   OS << TAB_1 "return Result;\n";
diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp
index 2a7c63c3dfd1f..ca1b5e3e2bead 100644
--- a/offload/tools/offload-tblgen/PrintGen.cpp
+++ b/offload/tools/offload-tblgen/PrintGen.cpp
@@ -20,24 +20,24 @@
 using namespace llvm;
 using namespace offload::tblgen;
 
-constexpr auto PrintEnumHeader =
+constexpr auto PrintTypeHeader =
     R"(///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the {0} type
-/// @returns std::ostream &
+/// @returns llvm::raw_ostream &
 )";
 
 constexpr auto PrintTaggedEnumHeader =
     R"(///////////////////////////////////////////////////////////////////////////////
 /// @brief Print type-tagged {0} enum value
-/// @returns std::ostream &
+/// @returns llvm::raw_ostream &
 )";
 
 static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) {
-  OS << formatv(PrintEnumHeader, Enum.getName());
-  OS << formatv(
-      "inline std::ostream &operator<<(std::ostream &os, enum {0} value) "
-      "{{\n" TAB_1 "switch (value) {{\n",
-      Enum.getName());
+  OS << formatv(PrintTypeHeader, Enum.getName());
+  OS << formatv("inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os, "
+                "enum {0} value) "
+                "{{\n" TAB_1 "switch (value) {{\n",
+                Enum.getName());
 
   for (const auto &Val : Enum.getValues()) {
     auto Name = Enum.getEnumValNamePrefix() + "_" + Val.getName();
@@ -56,7 +56,7 @@ static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) {
   OS << formatv(PrintTaggedEnumHeader, Enum.getName());
 
   OS << formatv(R"""(template <>
-inline void printTagged(std::ostream &os, const void *ptr, {0} value, size_t size) {{
+inline void printTagged(llvm::raw_ostream &os, const void *ptr, {0} value, size_t size) {{
   if (ptr == NULL) {{
     printPtr(os, ptr);
     return;
@@ -96,7 +96,7 @@ inline void printTagged(std::ostream &os, const void *ptr, {0} value, size_t siz
 
 static void EmitResultPrint(raw_ostream &OS) {
   OS << R""(
-inline std::ostream &operator<<(std::ostream &os,
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                 const ol_error_struct_t *Err) {
   if (Err == nullptr) {
     os << "OL_SUCCESS";
@@ -115,7 +115,7 @@ static void EmitFunctionParamStructPrint(const FunctionRec &Func,
   }
 
   OS << formatv(R"(
-inline std::ostream &operator<<(std::ostream &os, const struct {0} *params) {{
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const struct {0} *params) {{
 )",
                 Func.getParamStructName());
 
@@ -150,6 +150,32 @@ inline std::ostream &operator<<(std::ostream &os, const struct {0} *params) {{
   OS << TAB_1 "return os;\n}\n";
 }
 
+void ProcessStruct(const StructRec &Struct, raw_ostream &OS) {
+  if (Struct.getName() == "ol_error_struct_t") {
+    return;
+  }
+  OS << formatv(PrintTypeHeader, Struct.getName());
+  OS << formatv(R"(
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const struct {0} params) {{
+)",
+                Struct.getName());
+  OS << formatv(TAB_1 "os << \"(struct {0}){{\";\n", Struct.getName());
+  for (const auto &Member : Struct.getMembers()) {
+    OS << formatv(TAB_1 "os << \".{0} = \";\n", Member.getName());
+    if (Member.isPointerType() || Member.isHandleType()) {
+      OS << formatv(TAB_1 "printPtr(os, params.{0});\n", Member.getName());
+    } else {
+      OS << formatv(TAB_1 "os << params.{0};\n", Member.getName());
+    }
+    if (Member.getName() != Struct.getMembers().back().getName()) {
+      OS << TAB_1 "os << \", \";\n";
+    }
+  }
+  OS << TAB_1 "os << \"}\";\n";
+  OS << TAB_1 "return os;\n";
+  OS << "}\n";
+}
+
 void EmitOffloadPrintHeader(const RecordKeeper &Records, raw_ostream &OS) {
   OS << GenericHeader;
   OS << R"""(
@@ -158,11 +184,11 @@ void EmitOffloadPrintHeader(const RecordKeeper &Records, raw_ostream &OS) {
 #pragma once
 
 #include <OffloadAPI.h>
-#include <ostream>
+#include <llvm/Support/raw_ostream.h>
 
 
-template <typename T> inline ol_result_t printPtr(std::ostream &os, const T *ptr);
-template <typename T> inline void printTagged(std::ostream &os, const void *ptr, T value, size_t size);
+template <typename T> inline ol_result_t printPtr(llvm::raw_ostream &os, const T *ptr);
+template <typename T> inline void printTagged(llvm::raw_ostream &os, const void *ptr, T value, size_t size);
 )""";
 
   // ==========
@@ -180,9 +206,9 @@ template <typename T> inline void printTagged(std::ostream &os, const void *ptr,
   // use each other.
   OS << "\n";
   for (auto *R : Records.getAllDerivedDefinitions("Enum")) {
-    OS << formatv(
-        "inline std::ostream &operator<<(std::ostream &os, enum {0} value);\n",
-        EnumRec{R}.getName());
+    OS << formatv("inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os, "
+                  "enum {0} value);\n",
+                  EnumRec{R}.getName());
   }
   OS << "\n";
 
@@ -193,6 +219,11 @@ template <typename T> inline void printTagged(std::ostream &os, const void *ptr,
   }
   EmitResultPrint(OS);
 
+  for (auto *R : Records.getAllDerivedDefinitions("Struct")) {
+    StructRec S{R};
+    ProcessStruct(S, OS);
+  }
+
   // Emit print functions for the function param structs
   for (auto *R : Records.getAllDerivedDefinitions("Function")) {
     EmitFunctionParamStructPrint(FunctionRec{R}, OS);
@@ -201,7 +232,7 @@ template <typename T> inline void printTagged(std::ostream &os, const void *ptr,
   OS << R"""(
 ///////////////////////////////////////////////////////////////////////////////
 // @brief Print pointer value
-template <typename T> inline ol_result_t printPtr(std::ostream &os, const T *ptr) {
+template <typename T> inline ol_result_t printPtr(llvm::raw_ostream &os, const T *ptr) {
     if (ptr == nullptr) {
         os << "nullptr";
     } else if constexpr (std::is_pointer_v<T>) {
diff --git a/offload/tools/offload-tblgen/RecordTypes.hpp b/offload/tools/offload-tblgen/RecordTypes.hpp
index 0bf3256c525d9..9faf361f4dd76 100644
--- a/offload/tools/offload-tblgen/RecordTypes.hpp
+++ b/offload/tools/offload-tblgen/RecordTypes.hpp
@@ -103,6 +103,8 @@ class StructMemberRec {
   StringRef getType() const { return rec->getValueAsString("type"); }
   StringRef getName() const { return rec->getValueAsString("name"); }
   StringRef getDesc() const { return rec->getValueAsString("desc"); }
+  bool isPointerType() const { return getType().ends_with('*'); }
+  bool isHandleType() const { return getType().ends_with("_handle_t"); }
 
 private:
   const Record *rec;
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 033ee2b6ec746..a6fee7fe0d29d 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -1,6 +1,9 @@
 set(PLUGINS_TEST_COMMON LLVMOffload)
 set(PLUGINS_TEST_INCLUDE ${LIBOMPTARGET_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/common)
 
+add_subdirectory(device_code)
+message(${OFFLOAD_TEST_DEVICE_CODE_PATH})
+
 add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatform.cpp
@@ -10,7 +13,23 @@ add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDevice.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceCount.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfo.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp)
-add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON})
+    ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olCreateQueue.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olWaitQueue.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olReleaseQueue.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olRetainQueue.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemAlloc.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemFree.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueMemcpy.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueKernelLaunch.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/program/olCreateProgram.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/program/olRetainProgram.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/program/olReleaseProgram.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olCreateKernel.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olReleaseKernel.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olRetainKernel.cpp
+    )
+add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON} LibomptUnitTestsDeviceBins)
+target_compile_definitions("offload.unittests" PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
 target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON})
 target_include_directories("offload.unittests" PRIVATE ${PLUGINS_TEST_INCLUDE})
diff --git a/offload/unittests/OffloadAPI/common/Environment.cpp b/offload/unittests/OffloadAPI/common/Environment.cpp
index f07a66cda2189..1eb0247e1b494 100644
--- a/offload/unittests/OffloadAPI/common/Environment.cpp
+++ b/offload/unittests/OffloadAPI/common/Environment.cpp
@@ -9,7 +9,9 @@
 #include "Environment.hpp"
 #include "Fixtures.hpp"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include <OffloadAPI.h>
+#include <fstream>
 
 using namespace llvm;
 
@@ -94,3 +96,37 @@ ol_platform_handle_t TestEnvironment::getPlatform() {
 
   return Platform;
 }
+
+// TODO: Allow overriding via cmd line arg
+const std::string DeviceBinsDirectory = DEVICE_CODE_PATH;
+
+bool TestEnvironment::loadDeviceBinary(
+    const std::string &BinaryName, ol_platform_handle_t Platform,
+    std::unique_ptr<MemoryBuffer> &BinaryOut) {
+
+  // Get the platform type
+  ol_platform_backend_t Backend = OL_PLATFORM_BACKEND_UNKNOWN;
+  olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
+                    &Backend);
+  std::string FileExtension;
+  if (Backend == OL_PLATFORM_BACKEND_AMDGPU) {
+    FileExtension = ".amdgpu.bin";
+  } else if (Backend == OL_PLATFORM_BACKEND_CUDA) {
+    FileExtension = ".nvptx64.bin";
+  } else {
+    errs() << "Unsupported platform type for a device binary test.\n";
+    return false;
+  }
+
+  std::string SourcePath =
+      DeviceBinsDirectory + "/" + BinaryName + FileExtension;
+
+  auto SourceFile = MemoryBuffer::getFile(SourcePath, false, false);
+  if (!SourceFile) {
+    errs() << "failed to read device binary file: " + SourcePath;
+    return false;
+  }
+
+  BinaryOut = std::move(SourceFile.get());
+  return true;
+}
diff --git a/offload/unittests/OffloadAPI/common/Environment.hpp b/offload/unittests/OffloadAPI/common/Environment.hpp
index 6dba2381eb0b7..dc5d4fce7d581 100644
--- a/offload/unittests/OffloadAPI/common/Environment.hpp
+++ b/offload/unittests/OffloadAPI/common/Environment.hpp
@@ -8,10 +8,14 @@
 
 #pragma once
 
+#include "llvm/Support/MemoryBuffer.h"
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
 namespace TestEnvironment {
 const std::vector<ol_platform_handle_t> &getPlatforms();
 ol_platform_handle_t getPlatform();
+bool loadDeviceBinary(const std::string &BinaryName,
+                      ol_platform_handle_t Platform,
+                      std::unique_ptr<llvm::MemoryBuffer> &BinaryOut);
 } // namespace TestEnvironment
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index 410a435dee1b5..c08ac716bff42 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -27,6 +27,14 @@
   } while (0)
 #endif
 
+#ifndef ASSERT_ANY_ERROR
+#define ASSERT_ANY_ERROR(ACTUAL)                                               \
+  do {                                                                         \
+    ol_result_t Res = ACTUAL;                                                  \
+    ASSERT_TRUE(Res);                                                          \
+  } while (0)
+#endif
+
 #define RETURN_ON_FATAL_FAILURE(...)                                           \
   __VA_ARGS__;                                                                 \
   if (this->HasFatalFailure() || this->IsSkipped()) {                          \
@@ -34,13 +42,13 @@
   }                                                                            \
   (void)0
 
-struct offloadTest : ::testing::Test {
+struct OffloadTest : ::testing::Test {
   // No special behavior now, but just in case we need to override it in future
 };
 
-struct offloadPlatformTest : offloadTest {
+struct OffloadPlatformTest : OffloadTest {
   void SetUp() override {
-    RETURN_ON_FATAL_FAILURE(offloadTest::SetUp());
+    RETURN_ON_FATAL_FAILURE(OffloadTest::SetUp());
 
     Platform = TestEnvironment::getPlatform();
     ASSERT_NE(Platform, nullptr);
@@ -49,9 +57,9 @@ struct offloadPlatformTest : offloadTest {
   ol_platform_handle_t Platform;
 };
 
-struct offloadDeviceTest : offloadPlatformTest {
+struct OffloadDeviceTest : OffloadPlatformTest {
   void SetUp() override {
-    RETURN_ON_FATAL_FAILURE(offloadPlatformTest::SetUp());
+    RETURN_ON_FATAL_FAILURE(OffloadPlatformTest::SetUp());
 
     uint32_t NumDevices;
     ASSERT_SUCCESS(olGetDeviceCount(Platform, &NumDevices));
@@ -60,5 +68,59 @@ struct offloadDeviceTest : offloadPlatformTest {
     ASSERT_SUCCESS(olGetDevice(Platform, 1, &Device));
   }
 
-  ol_device_handle_t Device;
+  ol_device_handle_t Device = nullptr;
+};
+
+// Fixture for a generic program test. If you want a different program, use
+// offloadQueueTest and create your own program handle with the binary you want.
+struct OffloadProgramTest : OffloadDeviceTest {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp());
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
+    ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
+    ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
+                                   DeviceBin->getBufferSize(), &Program));
+  }
+
+  void TearDown() override {
+    if (Program) {
+      olReleaseProgram(Program);
+    }
+    RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::TearDown());
+  }
+
+  ol_program_handle_t Program = nullptr;
+  std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
+};
+
+struct OffloadKernelTest : OffloadProgramTest {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUp());
+    ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+  }
+
+  void TearDown() override {
+    if (Kernel) {
+      olReleaseKernel(Kernel);
+    }
+    RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown());
+  }
+
+  ol_kernel_handle_t Kernel = nullptr;
+};
+
+struct OffloadQueueTest : OffloadDeviceTest {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp());
+    ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
+  }
+
+  void TearDown() override {
+    if (Queue) {
+      olReleaseQueue(Queue);
+    }
+    RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::TearDown());
+  }
+
+  ol_queue_handle_t Queue = nullptr;
 };
diff --git a/offload/unittests/OffloadAPI/device/olGetDevice.cpp b/offload/unittests/OffloadAPI/device/olGetDevice.cpp
index 68d4682dd3351..c3ec88f8036ae 100644
--- a/offload/unittests/OffloadAPI/device/olGetDevice.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDevice.cpp
@@ -10,7 +10,7 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olGetDeviceTest = offloadPlatformTest;
+using olGetDeviceTest = OffloadPlatformTest;
 
 TEST_F(olGetDeviceTest, Success) {
   uint32_t Count = 0;
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp
index ef377d671bf60..db813cb774cd4 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp
@@ -10,7 +10,7 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olGetDeviceCountTest = offloadPlatformTest;
+using olGetDeviceCountTest = OffloadPlatformTest;
 
 TEST_F(olGetDeviceCountTest, Success) {
   uint32_t Count = 0;
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
index c936802fb1e4d..a4bfc0abb2440 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
@@ -11,10 +11,10 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-struct olGetDeviceInfoTest : offloadDeviceTest,
+struct olGetDeviceInfoTest : OffloadDeviceTest,
                              ::testing::WithParamInterface<ol_device_info_t> {
 
-  void SetUp() override { RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp()); }
+  void SetUp() override { RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp()); }
 };
 
 INSTANTIATE_TEST_SUITE_P(
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
index 9e792d1c3e25e..b4b5042dbfd87 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
@@ -12,10 +12,10 @@
 #include "olDeviceInfo.hpp"
 
 struct olGetDeviceInfoSizeTest
-    : offloadDeviceTest,
+    : OffloadDeviceTest,
       ::testing::WithParamInterface<ol_device_info_t> {
 
-  void SetUp() override { RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp()); }
+  void SetUp() override { RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp()); }
 };
 
 // TODO: We could autogenerate the list of enum values
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
new file mode 100644
index 0000000000000..a6a5edcee48fb
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -0,0 +1,68 @@
+macro(add_offload_test_device_code test_filename test_name)
+    message("Building Offload API device code for test '${test_name}'")
+    set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+
+    # Build for NVPTX
+    if(OFFLOAD_TEST_TARGET_NVIDIA)
+        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
+        add_custom_command(OUTPUT ${BIN_PATH}
+            COMMAND
+            ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
+            -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
+            --cuda-path=${CUDA_ROOT}
+            ${SRC_PATH} -o ${BIN_PATH}
+            DEPENDS ${SRC_PATH}
+        )
+        list(APPEND BIN_PATHS ${BIN_PATH})
+    endif()
+
+    # Build for AMDGPU
+    if(OFFLOAD_TEST_TARGET_AMDGPU)
+        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
+        add_custom_command(OUTPUT ${BIN_PATH}
+            COMMAND
+            ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
+            -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
+            ${SRC_PATH} -o ${BIN_PATH}
+            DEPENDS ${SRC_PATH}
+        )
+        list(APPEND BIN_PATHS ${BIN_PATH})
+    endif()
+
+    # TODO: Build for host CPU
+endmacro()
+
+
+# Decide what device targets to build for. LibomptargetGetDependencies is
+# included at the top-level so the GPUs present on the system are already
+# detected.
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+    "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+    "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
+find_package(CUDAToolkit QUIET)
+if(CUDAToolkit_FOUND)
+  get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+endif()
+if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+    set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
+    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
+elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
+endif()
+
+if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+    set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
+    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
+elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
+    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
+endif()
+
+add_offload_test_device_code(foo.c foo)
+add_offload_test_device_code(bar.c bar)
+
+add_custom_target(LibomptUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
+
+set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/OffloadAPI/device_code/bar.c b/offload/unittests/OffloadAPI/device_code/bar.c
new file mode 100644
index 0000000000000..786aa2f5d61e7
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/bar.c
@@ -0,0 +1,5 @@
+#include <gpuintrin.h>
+
+__gpu_kernel void foo(int *out) {
+  out[__gpu_thread_id(0)] = __gpu_thread_id(0) + 1;
+}
diff --git a/offload/unittests/OffloadAPI/device_code/foo.c b/offload/unittests/OffloadAPI/device_code/foo.c
new file mode 100644
index 0000000000000..5bc893961d49f
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/foo.c
@@ -0,0 +1,5 @@
+#include <gpuintrin.h>
+
+__gpu_kernel void foo(int *out) {
+  out[__gpu_thread_id(0)] = __gpu_thread_id(0);
+}
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
new file mode 100644
index 0000000000000..d185cc5831f2b
--- /dev/null
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
@@ -0,0 +1,66 @@
+//===------- Offload API tests - olEnqueueKernelLaunch --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+struct olEnqueueKernelLaunchTest : OffloadQueueTest {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
+    ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
+    ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
+                                   DeviceBin->getBufferSize(), &Program));
+    ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+  }
+
+  void TearDown() override {
+    if (Kernel) {
+      olReleaseKernel(Kernel);
+    }
+    if (Program) {
+      olReleaseProgram(Program);
+    }
+    RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown());
+  }
+
+  std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
+  ol_program_handle_t Program = nullptr;
+  ol_kernel_handle_t Kernel = nullptr;
+};
+
+TEST_F(olEnqueueKernelLaunchTest, Success) {
+  void *Mem;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 64, &Mem));
+  ol_kernel_launch_size_args_t LaunchArgs{};
+  LaunchArgs.Dimensions = 1;
+  LaunchArgs.GroupSizeX = 64;
+  LaunchArgs.GroupSizeY = 1;
+  LaunchArgs.GroupSizeZ = 1;
+
+  LaunchArgs.NumGroupsX = 1;
+  LaunchArgs.NumGroupsY = 1;
+  LaunchArgs.NumGroupsZ = 1;
+
+  struct {
+    void *Mem;
+  } Args{Mem};
+
+  ASSERT_SUCCESS(olEnqueueKernelLaunch(Queue, Kernel, &Args, sizeof(Args),
+                                       &LaunchArgs, nullptr));
+
+  ASSERT_SUCCESS(olWaitQueue(Queue));
+
+  int *Data = (int *)Mem;
+  for (int i = 0; i < 64; i++) {
+    ASSERT_EQ(Data[i], i);
+  }
+
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Mem));
+}
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
new file mode 100644
index 0000000000000..c43af8b4ae81d
--- /dev/null
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
@@ -0,0 +1,71 @@
+//===------- Offload API tests - olEnqueueMemcpy --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olEnqueueMemcpyTest = OffloadQueueTest;
+
+TEST_F(olEnqueueMemcpyTest, SuccessHtoD) {
+  constexpr size_t Size = 1024;
+  void *Alloc;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
+  std::vector<uint8_t> Input(Size, 42);
+  ol_device_handle_t Host;
+  ASSERT_SUCCESS(olGetHostDevice(&Host));
+  ASSERT_SUCCESS(
+      olEnqueueMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+  olWaitQueue(Queue);
+  olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+}
+
+TEST_F(olEnqueueMemcpyTest, SuccessDtoH) {
+  constexpr size_t Size = 1024;
+  void *Alloc;
+  std::vector<uint8_t> Input(Size, 42);
+  std::vector<uint8_t> Output(Size, 0);
+  ol_device_handle_t Host;
+  ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
+  ASSERT_SUCCESS(
+      olEnqueueMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+  ASSERT_SUCCESS(olEnqueueMemcpy(Queue, Output.data(), Host, Alloc, Device,
+                                 Size, nullptr));
+  ASSERT_SUCCESS(olWaitQueue(Queue));
+  for (uint8_t Val : Output) {
+    ASSERT_EQ(Val, 42);
+  }
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olEnqueueMemcpyTest, SuccessDtoD) {
+  constexpr size_t Size = 1024;
+  void *AllocA;
+  void *AllocB;
+  std::vector<uint8_t> Input(Size, 42);
+  std::vector<uint8_t> Output(Size, 0);
+  ol_device_handle_t Host;
+  ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
+  ASSERT_SUCCESS(olEnqueueMemcpy(Queue, AllocA, Device, Input.data(), Host,
+                                 Size, nullptr));
+  ASSERT_SUCCESS(
+      olEnqueueMemcpy(Queue, AllocB, Device, AllocA, Device, Size, nullptr));
+  ASSERT_SUCCESS(olEnqueueMemcpy(Queue, Output.data(), Host, AllocB, Device,
+                                 Size, nullptr));
+  ASSERT_SUCCESS(olWaitQueue(Queue));
+  for (uint8_t Val : Output) {
+    ASSERT_EQ(Val, 42);
+  }
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocA));
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocB));
+}
diff --git a/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp b/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
new file mode 100644
index 0000000000000..87879e6b1d375
--- /dev/null
+++ b/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
@@ -0,0 +1,31 @@
+//===------- Offload API tests - olCreateKernel ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCreateKernelTest = OffloadProgramTest;
+
+TEST_F(olCreateKernelTest, Success) {
+  ol_kernel_handle_t Kernel = nullptr;
+  ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+  ASSERT_NE(Kernel, nullptr);
+  ASSERT_SUCCESS(olReleaseKernel(Kernel));
+}
+
+TEST_F(olCreateKernelTest, InvalidNullProgram) {
+  ol_kernel_handle_t Kernel = nullptr;
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+               olCreateKernel(nullptr, "foo", &Kernel));
+}
+
+TEST_F(olCreateKernelTest, InvalidNullKernelPointer) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
+               olCreateKernel(Program, "foo", nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp b/offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp
new file mode 100644
index 0000000000000..8229253bbbb0f
--- /dev/null
+++ b/offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp
@@ -0,0 +1,22 @@
+//===------- Offload API tests - olReleaseKernel --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olReleaseKernelTest = OffloadKernelTest;
+
+TEST_F(olReleaseKernelTest, Success) {
+  ASSERT_SUCCESS(olRetainKernel(Kernel));
+  ASSERT_SUCCESS(olReleaseKernel(Kernel));
+}
+
+TEST_F(olReleaseKernelTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olReleaseKernel(nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp b/offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp
new file mode 100644
index 0000000000000..5bd4456fa4fbc
--- /dev/null
+++ b/offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp
@@ -0,0 +1,19 @@
+//===------- Offload API tests - olRetainKernel ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olRetainKernelTest = OffloadKernelTest;
+
+TEST_F(olRetainKernelTest, Success) { ASSERT_SUCCESS(olRetainKernel(Kernel)); }
+
+TEST_F(olRetainKernelTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olRetainKernel(nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
new file mode 100644
index 0000000000000..8912d8fcc1bf8
--- /dev/null
+++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
@@ -0,0 +1,45 @@
+//===------- Offload API tests - olMemAlloc -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olMemAllocTest = OffloadDeviceTest;
+
+TEST_F(olMemAllocTest, SuccessAllocShared) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, &Alloc));
+  ASSERT_NE(Alloc, nullptr);
+  olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc);
+}
+
+TEST_F(olMemAllocTest, SuccessAllocHost) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
+  ASSERT_NE(Alloc, nullptr);
+  olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc);
+}
+
+TEST_F(olMemAllocTest, SuccessAllocDevice) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
+  ASSERT_NE(Alloc, nullptr);
+  olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+}
+
+TEST_F(olMemAllocTest, InvalidNullDevice) {
+  void *Alloc = nullptr;
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+               olMemAlloc(nullptr, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
+}
+
+TEST_F(olMemAllocTest, InvalidNullOutPtr) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
+               olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/memory/olMemFree.cpp b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
new file mode 100644
index 0000000000000..45e1b2a61eace
--- /dev/null
+++ b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
@@ -0,0 +1,47 @@
+//===------- Offload API tests - olMemFree --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olMemFreeTest = OffloadDeviceTest;
+
+TEST_F(olMemFreeTest, SuccessFreeShared) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, &Alloc));
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc));
+}
+
+TEST_F(olMemFreeTest, SuccessFreeHost) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc));
+}
+
+TEST_F(olMemFreeTest, SuccessFreeDevice) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olMemFreeTest, InvalidNullDevice) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+               olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olMemFreeTest, InvalidNullPtr) {
+  void *Alloc = nullptr;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+               olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp
index 4a2f9e8ac7741..b663c623bf085 100644
--- a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp
+++ b/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp
@@ -10,7 +10,7 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olGetPlatformTest = offloadTest;
+using olGetPlatformTest = OffloadTest;
 
 TEST_F(olGetPlatformTest, Success) {
   uint32_t PlatformCount;
diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp
index 15b4b6abcd70d..3ae00f553f97e 100644
--- a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp
+++ b/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp
@@ -10,7 +10,7 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olGetPlatformCountTest = offloadTest;
+using olGetPlatformCountTest = OffloadTest;
 
 TEST_F(olGetPlatformCountTest, Success) {
   uint32_t PlatformCount;
diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp
index c646bdc50b7da..bd6ad3f84e776 100644
--- a/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp
+++ b/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp
@@ -12,7 +12,7 @@
 #include "olPlatformInfo.hpp"
 
 struct olGetPlatformInfoTest
-    : offloadPlatformTest,
+    : OffloadPlatformTest,
       ::testing::WithParamInterface<ol_platform_info_t> {};
 
 INSTANTIATE_TEST_SUITE_P(
diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp
index 7c9274082e8e4..5f6067e2e2591 100644
--- a/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp
+++ b/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp
@@ -12,7 +12,7 @@
 #include "olPlatformInfo.hpp"
 
 struct olGetPlatformInfoSizeTest
-    : offloadPlatformTest,
+    : OffloadPlatformTest,
       ::testing::WithParamInterface<ol_platform_info_t> {};
 
 INSTANTIATE_TEST_SUITE_P(
diff --git a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp
index d49cdb90d321a..f61bca0cf52f0 100644
--- a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp
+++ b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 #pragma once
 
+#include <unordered_map>
 #include <vector>
 
 // TODO: We could autogenerate these
diff --git a/offload/unittests/OffloadAPI/program/olCreateProgram.cpp b/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
new file mode 100644
index 0000000000000..c28b8db287252
--- /dev/null
+++ b/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
@@ -0,0 +1,27 @@
+//===------- Offload API tests - olCreateProgram --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCreateProgramTest = OffloadDeviceTest;
+
+TEST_F(olCreateProgramTest, Success) {
+
+  std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
+  ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
+  ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
+
+  ol_program_handle_t Program;
+  ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
+                                 DeviceBin->getBufferSize(), &Program));
+  ASSERT_NE(Program, nullptr);
+
+  ASSERT_SUCCESS(olReleaseProgram(Program));
+}
diff --git a/offload/unittests/OffloadAPI/program/olReleaseProgram.cpp b/offload/unittests/OffloadAPI/program/olReleaseProgram.cpp
new file mode 100644
index 0000000000000..01a326b1ef5fe
--- /dev/null
+++ b/offload/unittests/OffloadAPI/program/olReleaseProgram.cpp
@@ -0,0 +1,22 @@
+//===------- Offload API tests - olReleaseProgram -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olReleaseProgramTest = OffloadProgramTest;
+
+TEST_F(olReleaseProgramTest, Success) {
+  ASSERT_SUCCESS(olRetainProgram(Program));
+  ASSERT_SUCCESS(olReleaseProgram(Program));
+}
+
+TEST_F(olReleaseProgramTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olReleaseProgram(nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/program/olRetainProgram.cpp b/offload/unittests/OffloadAPI/program/olRetainProgram.cpp
new file mode 100644
index 0000000000000..9cd9cc9114258
--- /dev/null
+++ b/offload/unittests/OffloadAPI/program/olRetainProgram.cpp
@@ -0,0 +1,21 @@
+//===------- Offload API tests - olRetainProgram --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olRetainProgramTest = OffloadProgramTest;
+
+TEST_F(olRetainProgramTest, Success) {
+  ASSERT_SUCCESS(olRetainProgram(Program));
+}
+
+TEST_F(olRetainProgramTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olRetainProgram(nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
new file mode 100644
index 0000000000000..0534debed055a
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
@@ -0,0 +1,28 @@
+//===------- Offload API tests - olCreateQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCreateQueueTest = OffloadDeviceTest;
+
+TEST_F(olCreateQueueTest, Success) {
+  ol_queue_handle_t Queue = nullptr;
+  ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
+  ASSERT_NE(Queue, nullptr);
+}
+
+TEST_F(olCreateQueueTest, InvalidNullHandleDevice) {
+  ol_queue_handle_t Queue = nullptr;
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olCreateQueue(nullptr, &Queue));
+}
+
+TEST_F(olCreateQueueTest, InvalidNullPointerQueue) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olCreateQueue(Device, nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp b/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
new file mode 100644
index 0000000000000..723a73288acda
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
@@ -0,0 +1,21 @@
+//===------- Offload API tests - olReleaseQueue ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olReleaseQueueTest = OffloadQueueTest;
+
+// TODO: When we can fetch queue info we can check the reference count is
+// changing in an expected way. In the meantime just check the entry point
+// doesn't blow up.
+TEST_F(olReleaseQueueTest, Success) {
+  ASSERT_SUCCESS(olRetainQueue(Queue));
+  ASSERT_SUCCESS(olReleaseQueue(Queue));
+}
diff --git a/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
new file mode 100644
index 0000000000000..c6fb812b28151
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
@@ -0,0 +1,18 @@
+//===------- Offload API tests - olRetainQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olRetainQueueTest = OffloadQueueTest;
+
+// TODO: When we can fetch queue info we can check the reference count is
+// changing in the expected way. In the meantime just check the entry point
+// doesn't blow up.
+TEST_F(olRetainQueueTest, Success) { ASSERT_SUCCESS(olRetainQueue(Queue)); }
diff --git a/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp b/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp
new file mode 100644
index 0000000000000..07ef774583ae0
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp
@@ -0,0 +1,17 @@
+//===------- Offload API tests - olWaitQueue ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olWaitQueueTest = OffloadQueueTest;
+
+TEST_F(olWaitQueueTest, SuccessEmptyQueue) {
+  ASSERT_SUCCESS(olWaitQueue(Queue));
+}

>From 742b3e00f8e94ca0932ce6e21610c250828fa92c Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Fri, 7 Mar 2025 13:21:39 +0000
Subject: [PATCH 2/9] Remove reference counting from the API

---
 offload/liboffload/API/Event.td               |  14 +-
 offload/liboffload/API/Kernel.td              |  18 +-
 offload/liboffload/API/Program.td             |  18 +-
 offload/liboffload/API/Queue.td               |  18 +-
 .../liboffload/include/generated/OffloadAPI.h | 191 +++-------------
 .../include/generated/OffloadEntryPoints.inc  | 209 +++---------------
 .../include/generated/OffloadFuncs.inc        |  24 +-
 .../generated/OffloadImplFuncDecls.inc        |  16 +-
 .../include/generated/OffloadPrint.hpp        |  40 +---
 offload/liboffload/src/OffloadImpl.cpp        |  90 ++++----
 offload/unittests/OffloadAPI/CMakeLists.txt   |   9 +-
 .../unittests/OffloadAPI/common/Fixtures.hpp  |   6 +-
 .../enqueue/olEnqueueKernelLaunch.cpp         |   4 +-
 .../OffloadAPI/kernel/olCreateKernel.cpp      |   2 +-
 ...lReleaseKernel.cpp => olDestroyKernel.cpp} |  12 +-
 .../OffloadAPI/program/olCreateProgram.cpp    |   2 +-
 ...eleaseProgram.cpp => olDestroyProgram.cpp} |  12 +-
 .../OffloadAPI/program/olRetainProgram.cpp    |  21 --
 .../olDestroyQueue.cpp}                       |  13 +-
 .../OffloadAPI/queue/olReleaseQueue.cpp       |  21 --
 .../OffloadAPI/queue/olRetainQueue.cpp        |  18 --
 21 files changed, 158 insertions(+), 600 deletions(-)
 rename offload/unittests/OffloadAPI/kernel/{olReleaseKernel.cpp => olDestroyKernel.cpp} (62%)
 rename offload/unittests/OffloadAPI/program/{olReleaseProgram.cpp => olDestroyProgram.cpp} (61%)
 delete mode 100644 offload/unittests/OffloadAPI/program/olRetainProgram.cpp
 rename offload/unittests/OffloadAPI/{kernel/olRetainKernel.cpp => queue/olDestroyQueue.cpp} (55%)
 delete mode 100644 offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
 delete mode 100644 offload/unittests/OffloadAPI/queue/olRetainQueue.cpp

diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td
index 066704efba5b6..c9f79159cf263 100644
--- a/offload/liboffload/API/Event.td
+++ b/offload/liboffload/API/Event.td
@@ -11,18 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 def : Function {
-    let name = "olRetainEvent";
-    let desc = "Increment the event's reference count.";
-    let details = [];
-    let params = [
-        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
-    ];
-    let returns = [];
-}
-
-def : Function {
-    let name = "olReleaseEvent";
-    let desc = "Decrement the event's reference count, and free it if the reference count reaches 0.";
+    let name = "olDestroyEvent";
+    let desc = "Destroy the event and free all underlying resources.";
     let details = [];
     let params = [
         Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 3620e02c3b7bf..ef831f7c29801 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -13,9 +13,7 @@
 def : Function {
     let name = "olCreateKernel";
     let desc = "Create a kernel from the function identified by `KernelName` in the given program.";
-    let details = [
-        "The created kernel has an initial reference count of 1."
-    ];
+    let details = [];
     let params = [
         Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
         Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
@@ -25,18 +23,8 @@ def : Function {
 }
 
 def : Function {
-    let name = "olRetainKernel";
-    let desc = "Increment the kernel's reference count.";
-    let details = [];
-    let params = [
-        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
-    ];
-    let returns = [];
-}
-
-def : Function {
-    let name = "olReleaseKernel";
-    let desc = "Decrement the kernel's reference count, and free it if the reference count reaches 0.";
+    let name = "olDestroyKernel";
+    let desc = "Destroy the kernel and free all underlying resources.";
     let details = [];
     let params = [
         Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index df644934f8b2f..8c88fe6e21e6a 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -13,9 +13,7 @@
 def : Function {
     let name = "olCreateProgram";
     let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
-    let details = [
-        "The created program has an initial reference count of 1."
-    ];
+    let details = [];
     let params = [
         Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
         Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
@@ -26,18 +24,8 @@ def : Function {
 }
 
 def : Function {
-    let name = "olRetainProgram";
-    let desc = "Increment the program's reference count.";
-    let details = [];
-    let params = [
-        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
-    ];
-    let returns = [];
-}
-
-def : Function {
-    let name = "olReleaseProgram";
-    let desc = "Decrement the program's reference count, and free it if the reference count reaches 0.";
+    let name = "olDestroyProgram";
+    let desc = "Destroy the program and free all underlying resources.";
     let details = [];
     let params = [
         Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index 6725d6737ce2a..b5bb619c57514 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -13,9 +13,7 @@
 def : Function {
     let name = "olCreateQueue";
     let desc = "Create a queue for the given device.";
-    let details = [
-        "The created queue has an initial reference count of 1."
-    ];
+    let details = [];
     let params = [
         Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
         Param<"ol_queue_handle_t*", "Queue", "output pointer for the created queue", PARAM_OUT>
@@ -24,18 +22,8 @@ def : Function {
 }
 
 def : Function {
-    let name = "olRetainQueue";
-    let desc = "Increment the queue's reference count.";
-    let details = [];
-    let params = [
-        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
-    ];
-    let returns = [];
-}
-
-def : Function {
-    let name = "olReleaseQueue";
-    let desc = "Decrement the queues's reference count, and free it if the reference count reaches 0.";
+    let name = "olDestroyQueue";
+    let desc = "Destroy the queue and free all underlying resources.";
     let details = [];
     let params = [
         Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index dc291907570f1..7d2b90cc27a31 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -557,7 +557,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
 /// @brief Create a queue for the given device.
 ///
 /// @details
-///    - The created queue has an initial reference count of 1.
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
@@ -574,7 +573,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(
     ol_queue_handle_t *Queue);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Increment the queue's reference count.
+/// @brief Destroy the queue and free all underlying resources.
 ///
 /// @details
 ///
@@ -585,24 +584,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
 ///         + `NULL == Queue`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(
-    // [in] handle of the queue
-    ol_queue_handle_t Queue);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Decrement the queues's reference count, and free it if the reference
-/// count reaches 0.
-///
-/// @details
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Queue`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyQueue(
     // [in] handle of the queue
     ol_queue_handle_t Queue);
 
@@ -623,24 +605,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitQueue(
     ol_queue_handle_t Queue);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Increment the event's reference count.
-///
-/// @details
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Event`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(
-    // [in] handle of the event
-    ol_event_handle_t Event);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Decrement the event's reference count, and free it if the reference
-/// count reaches 0.
+/// @brief Destroy the event and free all underlying resources.
 ///
 /// @details
 ///
@@ -651,7 +616,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
 ///         + `NULL == Event`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyEvent(
     // [in] handle of the event
     ol_event_handle_t Event);
 
@@ -753,7 +718,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
 /// `ProgData`.
 ///
 /// @details
-///    - The created program has an initial reference count of 1.
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
@@ -775,24 +739,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
     ol_program_handle_t *Program);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Increment the program's reference count.
-///
-/// @details
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Program`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olRetainProgram(
-    // [in] handle of the program
-    ol_program_handle_t Program);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Decrement the program's reference count, and free it if the reference
-/// count reaches 0.
+/// @brief Destroy the program and free all underlying resources.
 ///
 /// @details
 ///
@@ -803,7 +750,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainProgram(
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
 ///         + `NULL == Program`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgram(
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyProgram(
     // [in] handle of the program
     ol_program_handle_t Program);
 
@@ -812,7 +759,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgram(
 /// given program.
 ///
 /// @details
-///    - The created kernel has an initial reference count of 1.
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
@@ -832,24 +778,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
     ol_kernel_handle_t *Kernel);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Increment the kernel's reference count.
-///
-/// @details
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Kernel`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
-    // [in] handle of the kernel
-    ol_kernel_handle_t Kernel);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Decrement the kernel's reference count, and free it if the reference
-/// count reaches 0.
+/// @brief Destroy the kernel and free all underlying resources.
 ///
 /// @details
 ///
@@ -860,7 +789,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
 ///         + `NULL == Kernel`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyKernel(
     // [in] handle of the kernel
     ol_kernel_handle_t Kernel);
 
@@ -969,18 +898,11 @@ typedef struct ol_create_queue_params_t {
 } ol_create_queue_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olRetainQueue
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_retain_queue_params_t {
-  ol_queue_handle_t *pQueue;
-} ol_retain_queue_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olReleaseQueue
+/// @brief Function parameters for olDestroyQueue
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_release_queue_params_t {
+typedef struct ol_destroy_queue_params_t {
   ol_queue_handle_t *pQueue;
-} ol_release_queue_params_t;
+} ol_destroy_queue_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olWaitQueue
@@ -990,18 +912,11 @@ typedef struct ol_wait_queue_params_t {
 } ol_wait_queue_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olRetainEvent
+/// @brief Function parameters for olDestroyEvent
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_retain_event_params_t {
+typedef struct ol_destroy_event_params_t {
   ol_event_handle_t *pEvent;
-} ol_retain_event_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olReleaseEvent
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_release_event_params_t {
-  ol_event_handle_t *pEvent;
-} ol_release_event_params_t;
+} ol_destroy_event_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olWaitEvent
@@ -1046,18 +961,11 @@ typedef struct ol_create_program_params_t {
 } ol_create_program_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olRetainProgram
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_retain_program_params_t {
-  ol_program_handle_t *pProgram;
-} ol_retain_program_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olReleaseProgram
+/// @brief Function parameters for olDestroyProgram
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_release_program_params_t {
+typedef struct ol_destroy_program_params_t {
   ol_program_handle_t *pProgram;
-} ol_release_program_params_t;
+} ol_destroy_program_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olCreateKernel
@@ -1069,18 +977,11 @@ typedef struct ol_create_kernel_params_t {
 } ol_create_kernel_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olRetainKernel
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_retain_kernel_params_t {
-  ol_kernel_handle_t *pKernel;
-} ol_retain_kernel_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olReleaseKernel
+/// @brief Function parameters for olDestroyKernel
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_release_kernel_params_t {
+typedef struct ol_destroy_kernel_params_t {
   ol_kernel_handle_t *pKernel;
-} ol_release_kernel_params_t;
+} ol_destroy_kernel_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olInit that also sets source code location information
@@ -1187,17 +1088,10 @@ olCreateQueueWithCodeLoc(ol_device_handle_t Device, ol_queue_handle_t *Queue,
                          ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olRetainQueue that also sets source code location
+/// @brief Variant of olDestroyQueue that also sets source code location
 /// information
-/// @details See also ::olRetainQueue
-OL_APIEXPORT ol_result_t OL_APICALL olRetainQueueWithCodeLoc(
-    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olReleaseQueue that also sets source code location
-/// information
-/// @details See also ::olReleaseQueue
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueueWithCodeLoc(
+/// @details See also ::olDestroyQueue
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyQueueWithCodeLoc(
     ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1208,17 +1102,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitQueueWithCodeLoc(
     ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olRetainEvent that also sets source code location
-/// information
-/// @details See also ::olRetainEvent
-OL_APIEXPORT ol_result_t OL_APICALL olRetainEventWithCodeLoc(
-    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olReleaseEvent that also sets source code location
+/// @brief Variant of olDestroyEvent that also sets source code location
 /// information
-/// @details See also ::olReleaseEvent
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseEventWithCodeLoc(
+/// @details See also ::olDestroyEvent
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyEventWithCodeLoc(
     ol_event_handle_t Event, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1256,17 +1143,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateProgramWithCodeLoc(
     ol_program_handle_t *Program, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olRetainProgram that also sets source code location
+/// @brief Variant of olDestroyProgram that also sets source code location
 /// information
-/// @details See also ::olRetainProgram
-OL_APIEXPORT ol_result_t OL_APICALL olRetainProgramWithCodeLoc(
-    ol_program_handle_t Program, ol_code_location_t *CodeLocation);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olReleaseProgram that also sets source code location
-/// information
-/// @details See also ::olReleaseProgram
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgramWithCodeLoc(
+/// @details See also ::olDestroyProgram
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyProgramWithCodeLoc(
     ol_program_handle_t Program, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1278,17 +1158,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateKernelWithCodeLoc(
     ol_kernel_handle_t *Kernel, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olRetainKernel that also sets source code location
-/// information
-/// @details See also ::olRetainKernel
-OL_APIEXPORT ol_result_t OL_APICALL olRetainKernelWithCodeLoc(
-    ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olReleaseKernel that also sets source code location
+/// @brief Variant of olDestroyKernel that also sets source code location
 /// information
-/// @details See also ::olReleaseKernel
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernelWithCodeLoc(
+/// @details See also ::olDestroyKernel
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyKernelWithCodeLoc(
     ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
 
 #if defined(__cplusplus)
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 97cd2eb552c96..872f551edcb74 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -618,24 +618,24 @@ ol_result_t olCreateQueueWithCodeLoc(ol_device_handle_t Device,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olRetainQueue_val(ol_queue_handle_t Queue) {
+ol_impl_result_t olDestroyQueue_val(ol_queue_handle_t Queue) {
   if (offloadConfig().ValidationEnabled) {
     if (NULL == Queue) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
   }
 
-  return llvm::offload::olRetainQueue_impl(Queue);
+  return llvm::offload::olDestroyQueue_impl(Queue);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(ol_queue_handle_t Queue) {
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyQueue(ol_queue_handle_t Queue) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olRetainQueue";
+    llvm::errs() << "---> olDestroyQueue";
   }
 
-  ol_result_t Result = olRetainQueue_val(Queue);
+  ol_result_t Result = olDestroyQueue_val(Queue);
 
   if (offloadConfig().TracingEnabled) {
-    ol_retain_queue_params_t Params = {&Queue};
+    ol_destroy_queue_params_t Params = {&Queue};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -644,46 +644,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(ol_queue_handle_t Queue) {
   }
   return Result;
 }
-ol_result_t olRetainQueueWithCodeLoc(ol_queue_handle_t Queue,
-                                     ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olRetainQueue(Queue);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olReleaseQueue_val(ol_queue_handle_t Queue) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NULL == Queue) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-  }
-
-  return llvm::offload::olReleaseQueue_impl(Queue);
-}
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(ol_queue_handle_t Queue) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olReleaseQueue";
-  }
-
-  ol_result_t Result = olReleaseQueue_val(Queue);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_release_queue_params_t Params = {&Queue};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olReleaseQueueWithCodeLoc(ol_queue_handle_t Queue,
+ol_result_t olDestroyQueueWithCodeLoc(ol_queue_handle_t Queue,
                                       ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olReleaseQueue(Queue);
+  ol_result_t Result = ::olDestroyQueue(Queue);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -726,24 +690,24 @@ ol_result_t olWaitQueueWithCodeLoc(ol_queue_handle_t Queue,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olRetainEvent_val(ol_event_handle_t Event) {
+ol_impl_result_t olDestroyEvent_val(ol_event_handle_t Event) {
   if (offloadConfig().ValidationEnabled) {
     if (NULL == Event) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
   }
 
-  return llvm::offload::olRetainEvent_impl(Event);
+  return llvm::offload::olDestroyEvent_impl(Event);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(ol_event_handle_t Event) {
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyEvent(ol_event_handle_t Event) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olRetainEvent";
+    llvm::errs() << "---> olDestroyEvent";
   }
 
-  ol_result_t Result = olRetainEvent_val(Event);
+  ol_result_t Result = olDestroyEvent_val(Event);
 
   if (offloadConfig().TracingEnabled) {
-    ol_retain_event_params_t Params = {&Event};
+    ol_destroy_event_params_t Params = {&Event};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -752,46 +716,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(ol_event_handle_t Event) {
   }
   return Result;
 }
-ol_result_t olRetainEventWithCodeLoc(ol_event_handle_t Event,
-                                     ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olRetainEvent(Event);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olReleaseEvent_val(ol_event_handle_t Event) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NULL == Event) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-  }
-
-  return llvm::offload::olReleaseEvent_impl(Event);
-}
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(ol_event_handle_t Event) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olReleaseEvent";
-  }
-
-  ol_result_t Result = olReleaseEvent_val(Event);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_release_event_params_t Params = {&Event};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olReleaseEventWithCodeLoc(ol_event_handle_t Event,
+ol_result_t olDestroyEventWithCodeLoc(ol_event_handle_t Event,
                                       ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olReleaseEvent(Event);
+  ol_result_t Result = ::olDestroyEvent(Event);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -1023,25 +951,25 @@ ol_result_t olCreateProgramWithCodeLoc(ol_device_handle_t Device,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olRetainProgram_val(ol_program_handle_t Program) {
+ol_impl_result_t olDestroyProgram_val(ol_program_handle_t Program) {
   if (offloadConfig().ValidationEnabled) {
     if (NULL == Program) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
   }
 
-  return llvm::offload::olRetainProgram_impl(Program);
+  return llvm::offload::olDestroyProgram_impl(Program);
 }
 OL_APIEXPORT ol_result_t OL_APICALL
-olRetainProgram(ol_program_handle_t Program) {
+olDestroyProgram(ol_program_handle_t Program) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olRetainProgram";
+    llvm::errs() << "---> olDestroyProgram";
   }
 
-  ol_result_t Result = olRetainProgram_val(Program);
+  ol_result_t Result = olDestroyProgram_val(Program);
 
   if (offloadConfig().TracingEnabled) {
-    ol_retain_program_params_t Params = {&Program};
+    ol_destroy_program_params_t Params = {&Program};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -1050,47 +978,10 @@ olRetainProgram(ol_program_handle_t Program) {
   }
   return Result;
 }
-ol_result_t olRetainProgramWithCodeLoc(ol_program_handle_t Program,
-                                       ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olRetainProgram(Program);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olReleaseProgram_val(ol_program_handle_t Program) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NULL == Program) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-  }
-
-  return llvm::offload::olReleaseProgram_impl(Program);
-}
-OL_APIEXPORT ol_result_t OL_APICALL
-olReleaseProgram(ol_program_handle_t Program) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olReleaseProgram";
-  }
-
-  ol_result_t Result = olReleaseProgram_val(Program);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_release_program_params_t Params = {&Program};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olReleaseProgramWithCodeLoc(ol_program_handle_t Program,
+ol_result_t olDestroyProgramWithCodeLoc(ol_program_handle_t Program,
                                         ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olReleaseProgram(Program);
+  ol_result_t Result = ::olDestroyProgram(Program);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -1147,60 +1038,24 @@ ol_result_t olCreateKernelWithCodeLoc(ol_program_handle_t Program,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olRetainKernel_val(ol_kernel_handle_t Kernel) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NULL == Kernel) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-  }
-
-  return llvm::offload::olRetainKernel_impl(Kernel);
-}
-OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(ol_kernel_handle_t Kernel) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olRetainKernel";
-  }
-
-  ol_result_t Result = olRetainKernel_val(Kernel);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_retain_kernel_params_t Params = {&Kernel};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olRetainKernelWithCodeLoc(ol_kernel_handle_t Kernel,
-                                      ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olRetainKernel(Kernel);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olReleaseKernel_val(ol_kernel_handle_t Kernel) {
+ol_impl_result_t olDestroyKernel_val(ol_kernel_handle_t Kernel) {
   if (offloadConfig().ValidationEnabled) {
     if (NULL == Kernel) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
   }
 
-  return llvm::offload::olReleaseKernel_impl(Kernel);
+  return llvm::offload::olDestroyKernel_impl(Kernel);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(ol_kernel_handle_t Kernel) {
+OL_APIEXPORT ol_result_t OL_APICALL olDestroyKernel(ol_kernel_handle_t Kernel) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olReleaseKernel";
+    llvm::errs() << "---> olDestroyKernel";
   }
 
-  ol_result_t Result = olReleaseKernel_val(Kernel);
+  ol_result_t Result = olDestroyKernel_val(Kernel);
 
   if (offloadConfig().TracingEnabled) {
-    ol_release_kernel_params_t Params = {&Kernel};
+    ol_destroy_kernel_params_t Params = {&Kernel};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -1209,10 +1064,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(ol_kernel_handle_t Kernel) {
   }
   return Result;
 }
-ol_result_t olReleaseKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+ol_result_t olDestroyKernelWithCodeLoc(ol_kernel_handle_t Kernel,
                                        ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olReleaseKernel(Kernel);
+  ol_result_t Result = ::olDestroyKernel(Kernel);
 
   currentCodeLocation() = nullptr;
   return Result;
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index c504de68a778e..46f935e551454 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -24,20 +24,16 @@ OFFLOAD_FUNC(olGetHostDevice)
 OFFLOAD_FUNC(olMemAlloc)
 OFFLOAD_FUNC(olMemFree)
 OFFLOAD_FUNC(olCreateQueue)
-OFFLOAD_FUNC(olRetainQueue)
-OFFLOAD_FUNC(olReleaseQueue)
+OFFLOAD_FUNC(olDestroyQueue)
 OFFLOAD_FUNC(olWaitQueue)
-OFFLOAD_FUNC(olRetainEvent)
-OFFLOAD_FUNC(olReleaseEvent)
+OFFLOAD_FUNC(olDestroyEvent)
 OFFLOAD_FUNC(olWaitEvent)
 OFFLOAD_FUNC(olEnqueueMemcpy)
 OFFLOAD_FUNC(olEnqueueKernelLaunch)
 OFFLOAD_FUNC(olCreateProgram)
-OFFLOAD_FUNC(olRetainProgram)
-OFFLOAD_FUNC(olReleaseProgram)
+OFFLOAD_FUNC(olDestroyProgram)
 OFFLOAD_FUNC(olCreateKernel)
-OFFLOAD_FUNC(olRetainKernel)
-OFFLOAD_FUNC(olReleaseKernel)
+OFFLOAD_FUNC(olDestroyKernel)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -52,19 +48,15 @@ OFFLOAD_FUNC(olGetHostDeviceWithCodeLoc)
 OFFLOAD_FUNC(olMemAllocWithCodeLoc)
 OFFLOAD_FUNC(olMemFreeWithCodeLoc)
 OFFLOAD_FUNC(olCreateQueueWithCodeLoc)
-OFFLOAD_FUNC(olRetainQueueWithCodeLoc)
-OFFLOAD_FUNC(olReleaseQueueWithCodeLoc)
+OFFLOAD_FUNC(olDestroyQueueWithCodeLoc)
 OFFLOAD_FUNC(olWaitQueueWithCodeLoc)
-OFFLOAD_FUNC(olRetainEventWithCodeLoc)
-OFFLOAD_FUNC(olReleaseEventWithCodeLoc)
+OFFLOAD_FUNC(olDestroyEventWithCodeLoc)
 OFFLOAD_FUNC(olWaitEventWithCodeLoc)
 OFFLOAD_FUNC(olEnqueueMemcpyWithCodeLoc)
 OFFLOAD_FUNC(olEnqueueKernelLaunchWithCodeLoc)
 OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
-OFFLOAD_FUNC(olRetainProgramWithCodeLoc)
-OFFLOAD_FUNC(olReleaseProgramWithCodeLoc)
+OFFLOAD_FUNC(olDestroyProgramWithCodeLoc)
 OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
-OFFLOAD_FUNC(olRetainKernelWithCodeLoc)
-OFFLOAD_FUNC(olReleaseKernelWithCodeLoc)
+OFFLOAD_FUNC(olDestroyKernelWithCodeLoc)
 
 #undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index d6be874613cf5..085d7fb7bf674 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -49,15 +49,11 @@ ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
 ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
                                     ol_queue_handle_t *Queue);
 
-ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue);
-
-ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue);
+ol_impl_result_t olDestroyQueue_impl(ol_queue_handle_t Queue);
 
 ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue);
 
-ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event);
-
-ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event);
+ol_impl_result_t olDestroyEvent_impl(ol_event_handle_t Event);
 
 ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
 
@@ -77,14 +73,10 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
                                       const void *ProgData, size_t ProgDataSize,
                                       ol_program_handle_t *Program);
 
-ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program);
-
-ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program);
+ol_impl_result_t olDestroyProgram_impl(ol_program_handle_t Program);
 
 ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
                                      const char *KernelName,
                                      ol_kernel_handle_t *Kernel);
 
-ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel);
-
-ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel);
+ol_impl_result_t olDestroyKernel_impl(ol_kernel_handle_t Kernel);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 662c3e462d3d4..dd0dd466bd7d7 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -536,15 +536,7 @@ operator<<(llvm::raw_ostream &os,
 
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
-           const struct ol_retain_queue_params_t *params) {
-  os << ".Queue = ";
-  printPtr(os, *params->pQueue);
-  return os;
-}
-
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_release_queue_params_t *params) {
+           const struct ol_destroy_queue_params_t *params) {
   os << ".Queue = ";
   printPtr(os, *params->pQueue);
   return os;
@@ -559,15 +551,7 @@ operator<<(llvm::raw_ostream &os, const struct ol_wait_queue_params_t *params) {
 
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
-           const struct ol_retain_event_params_t *params) {
-  os << ".Event = ";
-  printPtr(os, *params->pEvent);
-  return os;
-}
-
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_release_event_params_t *params) {
+           const struct ol_destroy_event_params_t *params) {
   os << ".Event = ";
   printPtr(os, *params->pEvent);
   return os;
@@ -648,15 +632,7 @@ operator<<(llvm::raw_ostream &os,
 
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
-           const struct ol_retain_program_params_t *params) {
-  os << ".Program = ";
-  printPtr(os, *params->pProgram);
-  return os;
-}
-
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_release_program_params_t *params) {
+           const struct ol_destroy_program_params_t *params) {
   os << ".Program = ";
   printPtr(os, *params->pProgram);
   return os;
@@ -678,15 +654,7 @@ operator<<(llvm::raw_ostream &os,
 
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
-           const struct ol_retain_kernel_params_t *params) {
-  os << ".Kernel = ";
-  printPtr(os, *params->pKernel);
-  return os;
-}
-
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_release_kernel_params_t *params) {
+           const struct ol_destroy_kernel_params_t *params) {
   os << ".Kernel = ";
   printPtr(os, *params->pKernel);
   return os;
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 9e81d905d0216..2266a7fa457c7 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -40,39 +40,50 @@ using namespace llvm::omp::target::plugin;
 // Handle type definitions. Ideally these would be 1:1 with the plugins, but
 // we add some additional data here for now to avoid churn in the plugin
 // interface.
-
-struct RefCounted {
-  std::atomic_uint32_t RefCount;
-};
-
 struct ol_device_impl_t {
+  ol_device_impl_t(int DeviceNum, GenericDeviceTy *Device,
+                   ol_platform_handle_t Platform)
+      : DeviceNum(DeviceNum), Device(Device), Platform(Platform) {}
   int DeviceNum;
   GenericDeviceTy *Device;
   ol_platform_handle_t Platform;
 };
 
 struct ol_platform_impl_t {
+  ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin,
+                     std::vector<ol_device_impl_t> Devices)
+      : Plugin(std::move(Plugin)), Devices(Devices) {}
   std::unique_ptr<GenericPluginTy> Plugin;
   std::vector<ol_device_impl_t> Devices;
 };
 
-struct ol_queue_impl_t : RefCounted {
+struct ol_queue_impl_t {
+  ol_queue_impl_t(__tgt_async_info *AsyncInfo, ol_device_handle_t Device)
+      : AsyncInfo(AsyncInfo), Device(Device) {}
   __tgt_async_info *AsyncInfo;
   ol_device_handle_t Device;
 };
 
-struct ol_event_impl_t : RefCounted {
+struct ol_event_impl_t {
+  ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
+      : EventInfo(EventInfo), Queue(Queue) {}
+  ~ol_event_impl_t() { (void)Queue->Device->Device->destroyEvent(EventInfo); }
   void *EventInfo;
   ol_queue_handle_t Queue;
 };
 
-struct ol_program_impl_t : RefCounted {
+struct ol_program_impl_t {
+  ol_program_impl_t(plugin::DeviceImageTy *Image,
+                    std::unique_ptr<llvm::MemoryBuffer> ImageData,
+                    const __tgt_device_image &DeviceImage)
+      : Image(Image), ImageData(std::move(ImageData)),
+        DeviceImage(DeviceImage) {}
   plugin::DeviceImageTy *Image;
   std::unique_ptr<llvm::MemoryBuffer> ImageData;
   __tgt_device_image DeviceImage;
 };
 
-struct ol_kernel_impl_t : RefCounted {
+struct ol_kernel_impl_t {
   GenericKernelTy *KernelImpl;
 };
 
@@ -90,15 +101,8 @@ ol_device_handle_t HostDevice() {
   return &HostDeviceImpl;
 }
 
-template <typename HandleT> ol_impl_result_t olRetain(HandleT Handle) {
-  Handle->RefCount++;
-  return OL_SUCCESS;
-}
-
-template <typename HandleT> ol_impl_result_t olRelease(HandleT Handle) {
-  if (--Handle->RefCount == 0) {
-    delete Handle;
-  }
+template <typename HandleT> ol_impl_result_t olDestroy(HandleT Handle) {
+  delete Handle;
   return OL_SUCCESS;
 }
 
@@ -336,23 +340,17 @@ ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
 
 ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
                                     ol_queue_handle_t *Queue) {
-  auto CreatedQueue = std::make_unique<ol_queue_impl_t>();
+  auto CreatedQueue = std::make_unique<ol_queue_impl_t>(nullptr, Device);
   auto Err = Device->Device->initAsyncInfo(&(CreatedQueue->AsyncInfo));
   if (Err)
     return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
 
-  CreatedQueue->Device = Device;
-  CreatedQueue->RefCount = 1;
   *Queue = CreatedQueue.release();
   return OL_SUCCESS;
 }
 
-ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
-  return olRetain(Queue);
-}
-
-ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
-  return olRelease(Queue);
+ol_impl_result_t olDestroyQueue_impl(ol_queue_handle_t Queue) {
+  return olDestroy(Queue);
 }
 
 ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue) {
@@ -382,17 +380,12 @@ ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
   return OL_SUCCESS;
 }
 
-ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event) {
-  return olRetain(Event);
-}
-
-ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
-  return olRelease(Event);
+ol_impl_result_t olDestroyEvent_impl(ol_event_handle_t Event) {
+  return olDestroy(Event);
 }
 
 ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
-  auto EventImpl = std::make_unique<ol_event_impl_t>();
-  EventImpl->Queue = Queue;
+  auto EventImpl = std::make_unique<ol_event_impl_t>(nullptr, Queue);
   auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo);
   if (Res)
     return nullptr;
@@ -447,13 +440,14 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
   auto ImageData = MemoryBuffer::getMemBufferCopy(
       StringRef(reinterpret_cast<const char *>(ProgData), ProgDataSize));
 
-  ol_program_handle_t Prog = new ol_program_impl_t();
-
-  Prog->DeviceImage = __tgt_device_image{
+  auto DeviceImage = __tgt_device_image{
       const_cast<char *>(ImageData->getBuffer().data()),
       const_cast<char *>(ImageData->getBuffer().data()) + ProgDataSize, nullptr,
       nullptr};
 
+  ol_program_handle_t Prog =
+      new ol_program_impl_t(nullptr, std::move(ImageData), DeviceImage);
+
   auto Res =
       Device->Device->loadBinary(Device->Device->Plugin, &Prog->DeviceImage);
   if (!Res) {
@@ -462,19 +456,14 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
   }
 
   Prog->Image = *Res;
-  Prog->RefCount = 1;
-  Prog->ImageData = std::move(ImageData);
+  // Prog->ImageData = std::move(ImageData);
   *Program = Prog;
 
   return OL_SUCCESS;
 }
 
-ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program) {
-  return olRetain(Program);
-}
-
-ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program) {
-  return olRelease(Program);
+ol_impl_result_t olDestroyProgram_impl(ol_program_handle_t Program) {
+  return olDestroy(Program);
 }
 
 ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
@@ -491,19 +480,14 @@ ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
     return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
 
   ol_kernel_handle_t CreatedKernel = new ol_kernel_impl_t();
-  CreatedKernel->RefCount = 1;
   CreatedKernel->KernelImpl = &*KernelImpl;
   *Kernel = CreatedKernel;
 
   return OL_SUCCESS;
 }
 
-ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel) {
-  return olRetain(Kernel);
-}
-
-ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel) {
-  return olRelease(Kernel);
+ol_impl_result_t olDestroyKernel_impl(ol_kernel_handle_t Kernel) {
+  return olDestroy(Kernel);
 }
 
 ol_impl_result_t
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index a6fee7fe0d29d..d17ccc70fef36 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -16,18 +16,15 @@ add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/queue/olCreateQueue.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/queue/olWaitQueue.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olReleaseQueue.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olRetainQueue.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olDestroyQueue.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemAlloc.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemFree.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueMemcpy.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueKernelLaunch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olCreateProgram.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/program/olRetainProgram.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/program/olReleaseProgram.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/program/olDestroyProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olCreateKernel.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olReleaseKernel.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olRetainKernel.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olDestroyKernel.cpp
     )
 add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON} LibomptUnitTestsDeviceBins)
 target_compile_definitions("offload.unittests" PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index c08ac716bff42..ea26b85803272 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -84,7 +84,7 @@ struct OffloadProgramTest : OffloadDeviceTest {
 
   void TearDown() override {
     if (Program) {
-      olReleaseProgram(Program);
+      olDestroyProgram(Program);
     }
     RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::TearDown());
   }
@@ -101,7 +101,7 @@ struct OffloadKernelTest : OffloadProgramTest {
 
   void TearDown() override {
     if (Kernel) {
-      olReleaseKernel(Kernel);
+      olDestroyKernel(Kernel);
     }
     RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown());
   }
@@ -117,7 +117,7 @@ struct OffloadQueueTest : OffloadDeviceTest {
 
   void TearDown() override {
     if (Queue) {
-      olReleaseQueue(Queue);
+      olDestroyQueue(Queue);
     }
     RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::TearDown());
   }
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
index d185cc5831f2b..a70f167482dfa 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
@@ -22,10 +22,10 @@ struct olEnqueueKernelLaunchTest : OffloadQueueTest {
 
   void TearDown() override {
     if (Kernel) {
-      olReleaseKernel(Kernel);
+      olDestroyKernel(Kernel);
     }
     if (Program) {
-      olReleaseProgram(Program);
+      olDestroyProgram(Program);
     }
     RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown());
   }
diff --git a/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp b/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
index 87879e6b1d375..5a6405cb22be1 100644
--- a/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
@@ -16,7 +16,7 @@ TEST_F(olCreateKernelTest, Success) {
   ol_kernel_handle_t Kernel = nullptr;
   ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
   ASSERT_NE(Kernel, nullptr);
-  ASSERT_SUCCESS(olReleaseKernel(Kernel));
+  ASSERT_SUCCESS(olDestroyKernel(Kernel));
 }
 
 TEST_F(olCreateKernelTest, InvalidNullProgram) {
diff --git a/offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp b/offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp
similarity index 62%
rename from offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp
rename to offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp
index 8229253bbbb0f..7bb26180207fa 100644
--- a/offload/unittests/OffloadAPI/kernel/olReleaseKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp
@@ -10,13 +10,13 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olReleaseKernelTest = OffloadKernelTest;
+using olDestroyKernelTest = OffloadKernelTest;
 
-TEST_F(olReleaseKernelTest, Success) {
-  ASSERT_SUCCESS(olRetainKernel(Kernel));
-  ASSERT_SUCCESS(olReleaseKernel(Kernel));
+TEST_F(olDestroyKernelTest, Success) {
+  ASSERT_SUCCESS(olDestroyKernel(Kernel));
+  Kernel = nullptr;
 }
 
-TEST_F(olReleaseKernelTest, InvalidNullHandle) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olReleaseKernel(nullptr));
+TEST_F(olDestroyKernelTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olDestroyKernel(nullptr));
 }
diff --git a/offload/unittests/OffloadAPI/program/olCreateProgram.cpp b/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
index c28b8db287252..bef2a16c6e10e 100644
--- a/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
+++ b/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
@@ -23,5 +23,5 @@ TEST_F(olCreateProgramTest, Success) {
                                  DeviceBin->getBufferSize(), &Program));
   ASSERT_NE(Program, nullptr);
 
-  ASSERT_SUCCESS(olReleaseProgram(Program));
+  ASSERT_SUCCESS(olDestroyProgram(Program));
 }
diff --git a/offload/unittests/OffloadAPI/program/olReleaseProgram.cpp b/offload/unittests/OffloadAPI/program/olDestroyProgram.cpp
similarity index 61%
rename from offload/unittests/OffloadAPI/program/olReleaseProgram.cpp
rename to offload/unittests/OffloadAPI/program/olDestroyProgram.cpp
index 01a326b1ef5fe..0a59411e2a57a 100644
--- a/offload/unittests/OffloadAPI/program/olReleaseProgram.cpp
+++ b/offload/unittests/OffloadAPI/program/olDestroyProgram.cpp
@@ -10,13 +10,13 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olReleaseProgramTest = OffloadProgramTest;
+using olDestroyProgramTest = OffloadProgramTest;
 
-TEST_F(olReleaseProgramTest, Success) {
-  ASSERT_SUCCESS(olRetainProgram(Program));
-  ASSERT_SUCCESS(olReleaseProgram(Program));
+TEST_F(olDestroyProgramTest, Success) {
+  ASSERT_SUCCESS(olDestroyProgram(Program));
+  Program = nullptr;
 }
 
-TEST_F(olReleaseProgramTest, InvalidNullHandle) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olReleaseProgram(nullptr));
+TEST_F(olDestroyProgramTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olDestroyProgram(nullptr));
 }
diff --git a/offload/unittests/OffloadAPI/program/olRetainProgram.cpp b/offload/unittests/OffloadAPI/program/olRetainProgram.cpp
deleted file mode 100644
index 9cd9cc9114258..0000000000000
--- a/offload/unittests/OffloadAPI/program/olRetainProgram.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===------- Offload API tests - olRetainProgram --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olRetainProgramTest = OffloadProgramTest;
-
-TEST_F(olRetainProgramTest, Success) {
-  ASSERT_SUCCESS(olRetainProgram(Program));
-}
-
-TEST_F(olRetainProgramTest, InvalidNullHandle) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olRetainProgram(nullptr));
-}
diff --git a/offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp b/offload/unittests/OffloadAPI/queue/olDestroyQueue.cpp
similarity index 55%
rename from offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp
rename to offload/unittests/OffloadAPI/queue/olDestroyQueue.cpp
index 5bd4456fa4fbc..b54694e0c798c 100644
--- a/offload/unittests/OffloadAPI/kernel/olRetainKernel.cpp
+++ b/offload/unittests/OffloadAPI/queue/olDestroyQueue.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olRetainKernel ---------------------------===//
+//===------- Offload API tests - olDestroyQueue ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,10 +10,13 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olRetainKernelTest = OffloadKernelTest;
+using olDestroyQueueTest = OffloadQueueTest;
 
-TEST_F(olRetainKernelTest, Success) { ASSERT_SUCCESS(olRetainKernel(Kernel)); }
+TEST_F(olDestroyQueueTest, Success) {
+  ASSERT_SUCCESS(olDestroyQueue(Queue));
+  Queue = nullptr;
+}
 
-TEST_F(olRetainKernelTest, InvalidNullHandle) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olRetainKernel(nullptr));
+TEST_F(olDestroyQueueTest, InvalidNullHandle) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olDestroyQueue(nullptr));
 }
diff --git a/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp b/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
deleted file mode 100644
index 723a73288acda..0000000000000
--- a/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===------- Offload API tests - olReleaseQueue ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olReleaseQueueTest = OffloadQueueTest;
-
-// TODO: When we can fetch queue info we can check the reference count is
-// changing in an expected way. In the meantime just check the entry point
-// doesn't blow up.
-TEST_F(olReleaseQueueTest, Success) {
-  ASSERT_SUCCESS(olRetainQueue(Queue));
-  ASSERT_SUCCESS(olReleaseQueue(Queue));
-}
diff --git a/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
deleted file mode 100644
index c6fb812b28151..0000000000000
--- a/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//===------- Offload API tests - olRetainQueue ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olRetainQueueTest = OffloadQueueTest;
-
-// TODO: When we can fetch queue info we can check the reference count is
-// changing in the expected way. In the meantime just check the entry point
-// doesn't blow up.
-TEST_F(olRetainQueueTest, Success) { ASSERT_SUCCESS(olRetainQueue(Queue)); }

>From 44fb1e7d2a3a00acf82ee4c776bab7007806c581 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 25 Mar 2025 14:06:01 +0000
Subject: [PATCH 3/9] Rename olEnqueue functions

---
 offload/liboffload/API/Enqueue.td             |  4 +-
 .../liboffload/include/generated/OffloadAPI.h | 29 +++----
 .../include/generated/OffloadEntryPoints.inc  | 86 +++++++++----------
 .../include/generated/OffloadFuncs.inc        |  8 +-
 .../generated/OffloadImplFuncDecls.inc        | 17 ++--
 .../include/generated/OffloadPrint.hpp        |  7 +-
 offload/liboffload/src/OffloadImpl.cpp        | 17 ++--
 offload/unittests/OffloadAPI/CMakeLists.txt   |  4 +-
 ...eueKernelLaunch.cpp => olLaunchKernel.cpp} | 10 +--
 .../{olEnqueueMemcpy.cpp => olMemcpy.cpp}     | 30 ++++---
 10 files changed, 105 insertions(+), 107 deletions(-)
 rename offload/unittests/OffloadAPI/enqueue/{olEnqueueKernelLaunch.cpp => olLaunchKernel.cpp} (84%)
 rename offload/unittests/OffloadAPI/enqueue/{olEnqueueMemcpy.cpp => olMemcpy.cpp} (66%)

diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
index c6f1c5ddc4b51..a2e9efd31299c 100644
--- a/offload/liboffload/API/Enqueue.td
+++ b/offload/liboffload/API/Enqueue.td
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 def : Function {
-    let name = "olEnqueueMemcpy";
+    let name = "olMemcpy";
     let desc = "Enqueue a memcpy operation.";
     let details = [
         "For host pointers, use the device returned by olGetHostDevice",
@@ -46,7 +46,7 @@ def : Struct {
 }
 
 def : Function {
-    let name = "olEnqueueKernelLaunch";
+    let name = "olLaunchKernel";
     let desc = "Enqueue a kernel launch with the specified size and parameters.";
     let details = [];
     let params = [
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 7d2b90cc27a31..bb57da4e95de9 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -656,7 +656,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == DstPtr`
 ///         + `NULL == SrcPtr`
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
+OL_APIEXPORT ol_result_t OL_APICALL olMemcpy(
     // [in] handle of the queue
     ol_queue_handle_t Queue,
     // [in] pointer to copy to
@@ -699,7 +699,7 @@ typedef struct ol_kernel_launch_size_args_t {
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == ArgumentsData`
 ///         + `NULL == LaunchSizeArgs`
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
+OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
     // [in] handle of the queue
     ol_queue_handle_t Queue,
     // [in] handle of the kernel
@@ -926,9 +926,9 @@ typedef struct ol_wait_event_params_t {
 } ol_wait_event_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olEnqueueMemcpy
+/// @brief Function parameters for olMemcpy
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_enqueue_memcpy_params_t {
+typedef struct ol_memcpy_params_t {
   ol_queue_handle_t *pQueue;
   void **pDstPtr;
   ol_device_handle_t *pDstDevice;
@@ -936,19 +936,19 @@ typedef struct ol_enqueue_memcpy_params_t {
   ol_device_handle_t *pSrcDevice;
   size_t *pSize;
   ol_event_handle_t **pEventOut;
-} ol_enqueue_memcpy_params_t;
+} ol_memcpy_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olEnqueueKernelLaunch
+/// @brief Function parameters for olLaunchKernel
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_enqueue_kernel_launch_params_t {
+typedef struct ol_launch_kernel_params_t {
   ol_queue_handle_t *pQueue;
   ol_kernel_handle_t *pKernel;
   const void **pArgumentsData;
   size_t *pArgumentsSize;
   const ol_kernel_launch_size_args_t **pLaunchSizeArgs;
   ol_event_handle_t **pEventOut;
-} ol_enqueue_kernel_launch_params_t;
+} ol_launch_kernel_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olCreateProgram
@@ -1116,19 +1116,18 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
     ol_event_handle_t Event, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olEnqueueMemcpy that also sets source code location
-/// information
-/// @details See also ::olEnqueueMemcpy
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyWithCodeLoc(
+/// @brief Variant of olMemcpy that also sets source code location information
+/// @details See also ::olMemcpy
+OL_APIEXPORT ol_result_t OL_APICALL olMemcpyWithCodeLoc(
     ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
     void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
     ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olEnqueueKernelLaunch that also sets source code location
+/// @brief Variant of olLaunchKernel that also sets source code location
 /// information
-/// @details See also ::olEnqueueKernelLaunch
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunchWithCodeLoc(
+/// @details See also ::olLaunchKernel
+OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernelWithCodeLoc(
     ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
     const void *ArgumentsData, size_t ArgumentsSize,
     const ol_kernel_launch_size_args_t *LaunchSizeArgs,
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 872f551edcb74..1c84153c39454 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -762,10 +762,10 @@ ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
-                                     ol_device_handle_t DstDevice, void *SrcPtr,
-                                     ol_device_handle_t SrcDevice, size_t Size,
-                                     ol_event_handle_t *EventOut) {
+ol_impl_result_t olMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
+                              ol_device_handle_t DstDevice, void *SrcPtr,
+                              ol_device_handle_t SrcDevice, size_t Size,
+                              ol_event_handle_t *EventOut) {
   if (offloadConfig().ValidationEnabled) {
     if (Size == 0) {
       return OL_ERRC_INVALID_SIZE;
@@ -792,23 +792,23 @@ ol_impl_result_t olEnqueueMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
     }
   }
 
-  return llvm::offload::olEnqueueMemcpy_impl(Queue, DstPtr, DstDevice, SrcPtr,
-                                             SrcDevice, Size, EventOut);
+  return llvm::offload::olMemcpy_impl(Queue, DstPtr, DstDevice, SrcPtr,
+                                      SrcDevice, Size, EventOut);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
-    ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
-    void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
-    ol_event_handle_t *EventOut) {
+OL_APIEXPORT ol_result_t OL_APICALL
+olMemcpy(ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+         void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+         ol_event_handle_t *EventOut) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olEnqueueMemcpy";
+    llvm::errs() << "---> olMemcpy";
   }
 
-  ol_result_t Result = olEnqueueMemcpy_val(Queue, DstPtr, DstDevice, SrcPtr,
-                                           SrcDevice, Size, EventOut);
+  ol_result_t Result =
+      olMemcpy_val(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size, EventOut);
 
   if (offloadConfig().TracingEnabled) {
-    ol_enqueue_memcpy_params_t Params = {
-        &Queue, &DstPtr, &DstDevice, &SrcPtr, &SrcDevice, &Size, &EventOut};
+    ol_memcpy_params_t Params = {&Queue,     &DstPtr, &DstDevice, &SrcPtr,
+                                 &SrcDevice, &Size,   &EventOut};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -817,15 +817,14 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
   }
   return Result;
 }
-ol_result_t olEnqueueMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
-                                       ol_device_handle_t DstDevice,
-                                       void *SrcPtr,
-                                       ol_device_handle_t SrcDevice,
-                                       size_t Size, ol_event_handle_t *EventOut,
-                                       ol_code_location_t *CodeLocation) {
+ol_result_t olMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
+                                ol_device_handle_t DstDevice, void *SrcPtr,
+                                ol_device_handle_t SrcDevice, size_t Size,
+                                ol_event_handle_t *EventOut,
+                                ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olEnqueueMemcpy(Queue, DstPtr, DstDevice, SrcPtr,
-                                         SrcDevice, Size, EventOut);
+  ol_result_t Result =
+      ::olMemcpy(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size, EventOut);
 
   currentCodeLocation() = nullptr;
   return Result;
@@ -833,10 +832,10 @@ ol_result_t olEnqueueMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
 
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t
-olEnqueueKernelLaunch_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                          const void *ArgumentsData, size_t ArgumentsSize,
-                          const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                          ol_event_handle_t *EventOut) {
+olLaunchKernel_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                   const void *ArgumentsData, size_t ArgumentsSize,
+                   const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                   ol_event_handle_t *EventOut) {
   if (offloadConfig().ValidationEnabled) {
     if (NULL == Queue) {
       return OL_ERRC_INVALID_NULL_HANDLE;
@@ -855,25 +854,25 @@ olEnqueueKernelLaunch_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
     }
   }
 
-  return llvm::offload::olEnqueueKernelLaunch_impl(
+  return llvm::offload::olLaunchKernel_impl(
       Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
 }
 OL_APIEXPORT ol_result_t OL_APICALL
-olEnqueueKernelLaunch(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                      const void *ArgumentsData, size_t ArgumentsSize,
-                      const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                      ol_event_handle_t *EventOut) {
+olLaunchKernel(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+               const void *ArgumentsData, size_t ArgumentsSize,
+               const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+               ol_event_handle_t *EventOut) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olEnqueueKernelLaunch";
+    llvm::errs() << "---> olLaunchKernel";
   }
 
-  ol_result_t Result = olEnqueueKernelLaunch_val(
+  ol_result_t Result = olLaunchKernel_val(
       Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
 
   if (offloadConfig().TracingEnabled) {
-    ol_enqueue_kernel_launch_params_t Params = {&Queue,          &Kernel,
-                                                &ArgumentsData,  &ArgumentsSize,
-                                                &LaunchSizeArgs, &EventOut};
+    ol_launch_kernel_params_t Params = {&Queue,          &Kernel,
+                                        &ArgumentsData,  &ArgumentsSize,
+                                        &LaunchSizeArgs, &EventOut};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -882,13 +881,14 @@ olEnqueueKernelLaunch(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
   }
   return Result;
 }
-ol_result_t olEnqueueKernelLaunchWithCodeLoc(
-    ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-    const void *ArgumentsData, size_t ArgumentsSize,
-    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation) {
+ol_result_t
+olLaunchKernelWithCodeLoc(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                          const void *ArgumentsData, size_t ArgumentsSize,
+                          const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                          ol_event_handle_t *EventOut,
+                          ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olEnqueueKernelLaunch(
+  ol_result_t Result = ::olLaunchKernel(
       Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
 
   currentCodeLocation() = nullptr;
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 46f935e551454..d8fc11a4267fe 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -28,8 +28,8 @@ OFFLOAD_FUNC(olDestroyQueue)
 OFFLOAD_FUNC(olWaitQueue)
 OFFLOAD_FUNC(olDestroyEvent)
 OFFLOAD_FUNC(olWaitEvent)
-OFFLOAD_FUNC(olEnqueueMemcpy)
-OFFLOAD_FUNC(olEnqueueKernelLaunch)
+OFFLOAD_FUNC(olMemcpy)
+OFFLOAD_FUNC(olLaunchKernel)
 OFFLOAD_FUNC(olCreateProgram)
 OFFLOAD_FUNC(olDestroyProgram)
 OFFLOAD_FUNC(olCreateKernel)
@@ -52,8 +52,8 @@ OFFLOAD_FUNC(olDestroyQueueWithCodeLoc)
 OFFLOAD_FUNC(olWaitQueueWithCodeLoc)
 OFFLOAD_FUNC(olDestroyEventWithCodeLoc)
 OFFLOAD_FUNC(olWaitEventWithCodeLoc)
-OFFLOAD_FUNC(olEnqueueMemcpyWithCodeLoc)
-OFFLOAD_FUNC(olEnqueueKernelLaunchWithCodeLoc)
+OFFLOAD_FUNC(olMemcpyWithCodeLoc)
+OFFLOAD_FUNC(olLaunchKernelWithCodeLoc)
 OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
 OFFLOAD_FUNC(olDestroyProgramWithCodeLoc)
 OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 085d7fb7bf674..07f546b0c23c8 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -57,17 +57,16 @@ ol_impl_result_t olDestroyEvent_impl(ol_event_handle_t Event);
 
 ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
 
-ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
-                                      ol_device_handle_t DstDevice,
-                                      void *SrcPtr,
-                                      ol_device_handle_t SrcDevice, size_t Size,
-                                      ol_event_handle_t *EventOut);
+ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+                               ol_device_handle_t DstDevice, void *SrcPtr,
+                               ol_device_handle_t SrcDevice, size_t Size,
+                               ol_event_handle_t *EventOut);
 
 ol_impl_result_t
-olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                           const void *ArgumentsData, size_t ArgumentsSize,
-                           const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                           ol_event_handle_t *EventOut);
+olLaunchKernel_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                    const void *ArgumentsData, size_t ArgumentsSize,
+                    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                    ol_event_handle_t *EventOut);
 
 ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
                                       const void *ProgData, size_t ProgDataSize,
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index dd0dd466bd7d7..0fe6a2b60e076 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -564,9 +564,8 @@ operator<<(llvm::raw_ostream &os, const struct ol_wait_event_params_t *params) {
   return os;
 }
 
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_enqueue_memcpy_params_t *params) {
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     const struct ol_memcpy_params_t *params) {
   os << ".Queue = ";
   printPtr(os, *params->pQueue);
   os << ", ";
@@ -592,7 +591,7 @@ operator<<(llvm::raw_ostream &os,
 
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
-           const struct ol_enqueue_kernel_launch_params_t *params) {
+           const struct ol_launch_kernel_params_t *params) {
   os << ".Queue = ";
   printPtr(os, *params->pQueue);
   os << ", ";
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 2266a7fa457c7..4cdc0c8db9f14 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -398,11 +398,10 @@ ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
   return EventImpl.release();
 }
 
-ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
-                                      ol_device_handle_t DstDevice,
-                                      void *SrcPtr,
-                                      ol_device_handle_t SrcDevice, size_t Size,
-                                      ol_event_handle_t *EventOut) {
+ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+                               ol_device_handle_t DstDevice, void *SrcPtr,
+                               ol_device_handle_t SrcDevice, size_t Size,
+                               ol_event_handle_t *EventOut) {
   if (DstDevice == HostDevice() && SrcDevice == HostDevice()) {
     // TODO: We could actually handle this with a plain memcpy but we currently
     // have no way of synchronizing this with the queue
@@ -491,10 +490,10 @@ ol_impl_result_t olDestroyKernel_impl(ol_kernel_handle_t Kernel) {
 }
 
 ol_impl_result_t
-olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                           const void *ArgumentsData, size_t ArgumentsSize,
-                           const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                           ol_event_handle_t *EventOut) {
+olLaunchKernel_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+                    const void *ArgumentsData, size_t ArgumentsSize,
+                    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                    ol_event_handle_t *EventOut) {
   auto *DeviceImpl = Queue->Device->Device;
 
   AsyncInfoWrapperTy AsyncInfoWrapper(*DeviceImpl, Queue->AsyncInfo);
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index d17ccc70fef36..0da0d7da56cc3 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -19,8 +19,8 @@ add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/queue/olDestroyQueue.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemAlloc.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemFree.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueMemcpy.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueKernelLaunch.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olMemcpy.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olLaunchKernel.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olCreateProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olDestroyProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olCreateKernel.cpp
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp b/offload/unittests/OffloadAPI/enqueue/olLaunchKernel.cpp
similarity index 84%
rename from offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
rename to offload/unittests/OffloadAPI/enqueue/olLaunchKernel.cpp
index a70f167482dfa..be6b08e1f5a8b 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueKernelLaunch.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olLaunchKernel.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olEnqueueKernelLaunch --------------------===//
+//===------- Offload API tests - olLaunchKernel --------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,7 +10,7 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-struct olEnqueueKernelLaunchTest : OffloadQueueTest {
+struct olLaunchKernelTest : OffloadQueueTest {
   void SetUp() override {
     RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
     ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
@@ -35,7 +35,7 @@ struct olEnqueueKernelLaunchTest : OffloadQueueTest {
   ol_kernel_handle_t Kernel = nullptr;
 };
 
-TEST_F(olEnqueueKernelLaunchTest, Success) {
+TEST_F(olLaunchKernelTest, Success) {
   void *Mem;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 64, &Mem));
   ol_kernel_launch_size_args_t LaunchArgs{};
@@ -52,8 +52,8 @@ TEST_F(olEnqueueKernelLaunchTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olEnqueueKernelLaunch(Queue, Kernel, &Args, sizeof(Args),
-                                       &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Kernel, &Args, sizeof(Args), &LaunchArgs, nullptr));
 
   ASSERT_SUCCESS(olWaitQueue(Queue));
 
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp b/offload/unittests/OffloadAPI/enqueue/olMemcpy.cpp
similarity index 66%
rename from offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
rename to offload/unittests/OffloadAPI/enqueue/olMemcpy.cpp
index c43af8b4ae81d..c28d5e0a35137 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olMemcpy.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olEnqueueMemcpy --------------------------===//
+//===------- Offload API tests - olMemcpy --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,9 +10,9 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olEnqueueMemcpyTest = OffloadQueueTest;
+using olMemcpyTest = OffloadQueueTest;
 
-TEST_F(olEnqueueMemcpyTest, SuccessHtoD) {
+TEST_F(olMemcpyTest, SuccessHtoD) {
   constexpr size_t Size = 1024;
   void *Alloc;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
@@ -20,12 +20,12 @@ TEST_F(olEnqueueMemcpyTest, SuccessHtoD) {
   ol_device_handle_t Host;
   ASSERT_SUCCESS(olGetHostDevice(&Host));
   ASSERT_SUCCESS(
-      olEnqueueMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+      olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
   olWaitQueue(Queue);
   olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
 }
 
-TEST_F(olEnqueueMemcpyTest, SuccessDtoH) {
+TEST_F(olMemcpyTest, SuccessDtoH) {
   constexpr size_t Size = 1024;
   void *Alloc;
   std::vector<uint8_t> Input(Size, 42);
@@ -35,9 +35,9 @@ TEST_F(olEnqueueMemcpyTest, SuccessDtoH) {
 
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
   ASSERT_SUCCESS(
-      olEnqueueMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
-  ASSERT_SUCCESS(olEnqueueMemcpy(Queue, Output.data(), Host, Alloc, Device,
-                                 Size, nullptr));
+      olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(Queue, Output.data(), Host, Alloc, Device, Size, nullptr));
   ASSERT_SUCCESS(olWaitQueue(Queue));
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
@@ -45,7 +45,7 @@ TEST_F(olEnqueueMemcpyTest, SuccessDtoH) {
   ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
 }
 
-TEST_F(olEnqueueMemcpyTest, SuccessDtoD) {
+TEST_F(olMemcpyTest, SuccessDtoD) {
   constexpr size_t Size = 1024;
   void *AllocA;
   void *AllocB;
@@ -56,12 +56,12 @@ TEST_F(olEnqueueMemcpyTest, SuccessDtoD) {
 
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
-  ASSERT_SUCCESS(olEnqueueMemcpy(Queue, AllocA, Device, Input.data(), Host,
-                                 Size, nullptr));
   ASSERT_SUCCESS(
-      olEnqueueMemcpy(Queue, AllocB, Device, AllocA, Device, Size, nullptr));
-  ASSERT_SUCCESS(olEnqueueMemcpy(Queue, Output.data(), Host, AllocB, Device,
-                                 Size, nullptr));
+      olMemcpy(Queue, AllocA, Device, Input.data(), Host, Size, nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(Queue, AllocB, Device, AllocA, Device, Size, nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(Queue, Output.data(), Host, AllocB, Device, Size, nullptr));
   ASSERT_SUCCESS(olWaitQueue(Queue));
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
@@ -69,3 +69,5 @@ TEST_F(olEnqueueMemcpyTest, SuccessDtoD) {
   ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocA));
   ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocB));
 }
+
+TEST_F(olMemcpyTest, SuccessSizeZero) {}
\ No newline at end of file

>From f751a4399ff30ef41ed8354a7a4d45fda7af911b Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 25 Mar 2025 16:43:00 +0000
Subject: [PATCH 4/9] Make queue arguments optional for memcpy and kernel
 launch

---
 offload/liboffload/API/Enqueue.td             | 19 ++++--
 .../liboffload/include/generated/OffloadAPI.h | 26 +++++---
 .../include/generated/OffloadEntryPoints.inc  | 61 ++++++++++---------
 .../generated/OffloadImplFuncDecls.inc        |  5 +-
 .../include/generated/OffloadPrint.hpp        |  3 +
 offload/liboffload/src/OffloadImpl.cpp        | 38 +++++++-----
 offload/unittests/OffloadAPI/CMakeLists.txt   |  4 +-
 .../{enqueue => kernel}/olLaunchKernel.cpp    | 42 +++++++++----
 .../{enqueue => memory}/olMemcpy.cpp          | 47 +++++++++++++-
 9 files changed, 170 insertions(+), 75 deletions(-)
 rename offload/unittests/OffloadAPI/{enqueue => kernel}/olLaunchKernel.cpp (67%)
 rename offload/unittests/OffloadAPI/{enqueue => memory}/olMemcpy.cpp (63%)

diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
index a2e9efd31299c..1936c2fffe021 100644
--- a/offload/liboffload/API/Enqueue.td
+++ b/offload/liboffload/API/Enqueue.td
@@ -15,10 +15,11 @@ def : Function {
     let desc = "Enqueue a memcpy operation.";
     let details = [
         "For host pointers, use the device returned by olGetHostDevice",
-        "At least one device must be a non-host device"
+        "If a queue is specified, at least one device must be a non-host device",
+        "If a queue is not specified, the memcpy happens synchronously"
     ];
     let params = [
-        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue.", PARAM_IN_OPTIONAL>,
         Param<"void*", "DstPtr", "pointer to copy to", PARAM_IN>,
         Param<"ol_device_handle_t", "DstDevice", "device that DstPtr belongs to", PARAM_IN>,
         Param<"void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
@@ -27,7 +28,7 @@ def : Function {
         Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
     ];
     let returns = [
-        Return<"OL_ERRC_INVALID_SIZE", ["`Size == 0`"]>
+        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>
     ];
 }
 
@@ -48,14 +49,20 @@ def : Struct {
 def : Function {
     let name = "olLaunchKernel";
     let desc = "Enqueue a kernel launch with the specified size and parameters.";
-    let details = [];
+    let details = [
+        "If a queue is not specified, kernel execution happens synchronously"
+    ];
     let params = [
-        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
+        Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
         Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
         Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
         Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
         Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
         Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
     ];
-    let returns = [];
+    let returns = [
+        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
+        Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
+    ];
 }
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index bb57da4e95de9..236ec438d524b 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -641,23 +641,23 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
 ///
 /// @details
 ///    - For host pointers, use the device returned by olGetHostDevice
-///    - At least one device must be a non-host device
+///    - If a queue is specified, at least one device must be a non-host device
+///    - If a queue is not specified, the memcpy happens synchronously
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
 ///     - ::OL_ERRC_UNINITIALIZED
 ///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_SIZE
-///         + `Size == 0`
+///     - ::OL_ERRC_INVALID_ARGUMENT
+///         + `Queue == NULL && EventOut != NULL`
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Queue`
 ///         + `NULL == DstDevice`
 ///         + `NULL == SrcDevice`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == DstPtr`
 ///         + `NULL == SrcPtr`
 OL_APIEXPORT ol_result_t OL_APICALL olMemcpy(
-    // [in] handle of the queue
+    // [in][optional] handle of the queue.
     ol_queue_handle_t Queue,
     // [in] pointer to copy to
     void *DstPtr,
@@ -688,20 +688,27 @@ typedef struct ol_kernel_launch_size_args_t {
 /// @brief Enqueue a kernel launch with the specified size and parameters.
 ///
 /// @details
+///    - If a queue is not specified, kernel execution happens synchronously
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
 ///     - ::OL_ERRC_UNINITIALIZED
 ///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_ARGUMENT
+///         + `Queue == NULL && EventOut != NULL`
+///     - ::OL_ERRC_INVALID_DEVICE
+///         + If Queue is non-null but does not belong to Device
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Queue`
+///         + `NULL == Device`
 ///         + `NULL == Kernel`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == ArgumentsData`
 ///         + `NULL == LaunchSizeArgs`
 OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
-    // [in] handle of the queue
+    // [in][optional] handle of the queue
     ol_queue_handle_t Queue,
+    // [in] handle of the device to execute on
+    ol_device_handle_t Device,
     // [in] handle of the kernel
     ol_kernel_handle_t Kernel,
     // [in] pointer to the kernel argument struct
@@ -943,6 +950,7 @@ typedef struct ol_memcpy_params_t {
 /// @details Each entry is a pointer to the parameter passed to the function;
 typedef struct ol_launch_kernel_params_t {
   ol_queue_handle_t *pQueue;
+  ol_device_handle_t *pDevice;
   ol_kernel_handle_t *pKernel;
   const void **pArgumentsData;
   size_t *pArgumentsSize;
@@ -1128,8 +1136,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemcpyWithCodeLoc(
 /// information
 /// @details See also ::olLaunchKernel
 OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernelWithCodeLoc(
-    ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-    const void *ArgumentsData, size_t ArgumentsSize,
+    ol_queue_handle_t Queue, ol_device_handle_t Device,
+    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
     const ol_kernel_launch_size_args_t *LaunchSizeArgs,
     ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
 
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 1c84153c39454..92dc3171b21ba 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -767,12 +767,8 @@ ol_impl_result_t olMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
                               ol_device_handle_t SrcDevice, size_t Size,
                               ol_event_handle_t *EventOut) {
   if (offloadConfig().ValidationEnabled) {
-    if (Size == 0) {
-      return OL_ERRC_INVALID_SIZE;
-    }
-
-    if (NULL == Queue) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
+    if (Queue == NULL && EventOut != NULL) {
+      return OL_ERRC_INVALID_ARGUMENT;
     }
 
     if (NULL == DstDevice) {
@@ -832,12 +828,17 @@ ol_result_t olMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
 
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t
-olLaunchKernel_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                   const void *ArgumentsData, size_t ArgumentsSize,
+olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
+                   ol_kernel_handle_t Kernel, const void *ArgumentsData,
+                   size_t ArgumentsSize,
                    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
                    ol_event_handle_t *EventOut) {
   if (offloadConfig().ValidationEnabled) {
-    if (NULL == Queue) {
+    if (Queue == NULL && EventOut != NULL) {
+      return OL_ERRC_INVALID_ARGUMENT;
+    }
+
+    if (NULL == Device) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }
 
@@ -854,25 +855,27 @@ olLaunchKernel_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
     }
   }
 
-  return llvm::offload::olLaunchKernel_impl(
-      Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
+  return llvm::offload::olLaunchKernel_impl(Queue, Device, Kernel,
+                                            ArgumentsData, ArgumentsSize,
+                                            LaunchSizeArgs, EventOut);
 }
-OL_APIEXPORT ol_result_t OL_APICALL
-olLaunchKernel(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-               const void *ArgumentsData, size_t ArgumentsSize,
-               const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-               ol_event_handle_t *EventOut) {
+OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
+    ol_queue_handle_t Queue, ol_device_handle_t Device,
+    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut) {
   if (offloadConfig().TracingEnabled) {
     llvm::errs() << "---> olLaunchKernel";
   }
 
-  ol_result_t Result = olLaunchKernel_val(
-      Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
+  ol_result_t Result =
+      olLaunchKernel_val(Queue, Device, Kernel, ArgumentsData, ArgumentsSize,
+                         LaunchSizeArgs, EventOut);
 
   if (offloadConfig().TracingEnabled) {
-    ol_launch_kernel_params_t Params = {&Queue,          &Kernel,
-                                        &ArgumentsData,  &ArgumentsSize,
-                                        &LaunchSizeArgs, &EventOut};
+    ol_launch_kernel_params_t Params = {
+        &Queue,         &Device,         &Kernel,  &ArgumentsData,
+        &ArgumentsSize, &LaunchSizeArgs, &EventOut};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -881,15 +884,15 @@ olLaunchKernel(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
   }
   return Result;
 }
-ol_result_t
-olLaunchKernelWithCodeLoc(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                          const void *ArgumentsData, size_t ArgumentsSize,
-                          const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                          ol_event_handle_t *EventOut,
-                          ol_code_location_t *CodeLocation) {
+ol_result_t olLaunchKernelWithCodeLoc(
+    ol_queue_handle_t Queue, ol_device_handle_t Device,
+    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olLaunchKernel(
-      Queue, Kernel, ArgumentsData, ArgumentsSize, LaunchSizeArgs, EventOut);
+  ol_result_t Result =
+      ::olLaunchKernel(Queue, Device, Kernel, ArgumentsData, ArgumentsSize,
+                       LaunchSizeArgs, EventOut);
 
   currentCodeLocation() = nullptr;
   return Result;
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 07f546b0c23c8..340eab9b0ec22 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -63,8 +63,9 @@ ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
                                ol_event_handle_t *EventOut);
 
 ol_impl_result_t
-olLaunchKernel_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                    const void *ArgumentsData, size_t ArgumentsSize,
+olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
+                    ol_kernel_handle_t Kernel, const void *ArgumentsData,
+                    size_t ArgumentsSize,
                     const ol_kernel_launch_size_args_t *LaunchSizeArgs,
                     ol_event_handle_t *EventOut);
 
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 0fe6a2b60e076..d6bf470c18e70 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -595,6 +595,9 @@ operator<<(llvm::raw_ostream &os,
   os << ".Queue = ";
   printPtr(os, *params->pQueue);
   os << ", ";
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
   os << ".Kernel = ";
   printPtr(os, *params->pKernel);
   os << ", ";
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 4cdc0c8db9f14..5d13d28b76bb7 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -403,25 +403,30 @@ ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
                                ol_device_handle_t SrcDevice, size_t Size,
                                ol_event_handle_t *EventOut) {
   if (DstDevice == HostDevice() && SrcDevice == HostDevice()) {
-    // TODO: We could actually handle this with a plain memcpy but we currently
-    // have no way of synchronizing this with the queue
-    return {OL_ERRC_INVALID_ARGUMENT,
-            "One of DstDevice and SrcDevice must be a non-host device"};
+    if (!Queue) {
+      std::memcpy(DstPtr, SrcPtr, Size);
+      return OL_SUCCESS;
+    } else {
+      return {OL_ERRC_INVALID_ARGUMENT,
+              "One of DstDevice and SrcDevice must be a non-host device if "
+              "Queue is specified"};
+    }
   }
 
+  // If no queue is given the memcpy will be synchronous
+  auto QueueImpl = Queue ? Queue->AsyncInfo : nullptr;
+
   if (DstDevice == HostDevice()) {
-    auto Res =
-        SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+    auto Res = SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, QueueImpl);
     if (Res)
       return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
   } else if (SrcDevice == HostDevice()) {
-    auto Res =
-        DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+    auto Res = DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, QueueImpl);
     if (Res)
       return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
   } else {
     auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device,
-                                               DstPtr, Size, Queue->AsyncInfo);
+                                               DstPtr, Size, QueueImpl);
     if (Res)
       return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
   }
@@ -490,14 +495,19 @@ ol_impl_result_t olDestroyKernel_impl(ol_kernel_handle_t Kernel) {
 }
 
 ol_impl_result_t
-olLaunchKernel_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
-                    const void *ArgumentsData, size_t ArgumentsSize,
+olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
+                    ol_kernel_handle_t Kernel, const void *ArgumentsData,
+                    size_t ArgumentsSize,
                     const ol_kernel_launch_size_args_t *LaunchSizeArgs,
                     ol_event_handle_t *EventOut) {
-  auto *DeviceImpl = Queue->Device->Device;
-
-  AsyncInfoWrapperTy AsyncInfoWrapper(*DeviceImpl, Queue->AsyncInfo);
+  auto *DeviceImpl = Device->Device;
+  if (Queue && Device != Queue->Device) {
+    return {OL_ERRC_INVALID_DEVICE,
+            "Device specified does not match the device of the given queue"};
+  }
 
+  auto *QueueImpl = Queue ? Queue->AsyncInfo : nullptr;
+  AsyncInfoWrapperTy AsyncInfoWrapper(*DeviceImpl, QueueImpl);
   KernelArgsTy LaunchArgs{};
   LaunchArgs.NumTeams[0] = LaunchSizeArgs->NumGroupsX;
   LaunchArgs.NumTeams[1] = LaunchSizeArgs->NumGroupsY;
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 0da0d7da56cc3..4174fcf876471 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -19,12 +19,12 @@ add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/queue/olDestroyQueue.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemAlloc.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemFree.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olMemcpy.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olLaunchKernel.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemcpy.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olCreateProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olDestroyProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olCreateKernel.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olDestroyKernel.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olLaunchKernel.cpp
     )
 add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON} LibomptUnitTestsDeviceBins)
 target_compile_definitions("offload.unittests" PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
diff --git a/offload/unittests/OffloadAPI/enqueue/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
similarity index 67%
rename from offload/unittests/OffloadAPI/enqueue/olLaunchKernel.cpp
rename to offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index be6b08e1f5a8b..87a0609667e70 100644
--- a/offload/unittests/OffloadAPI/enqueue/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -18,6 +18,14 @@ struct olLaunchKernelTest : OffloadQueueTest {
     ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
                                    DeviceBin->getBufferSize(), &Program));
     ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+    LaunchArgs.Dimensions = 1;
+    LaunchArgs.GroupSizeX = 64;
+    LaunchArgs.GroupSizeY = 1;
+    LaunchArgs.GroupSizeZ = 1;
+
+    LaunchArgs.NumGroupsX = 1;
+    LaunchArgs.NumGroupsY = 1;
+    LaunchArgs.NumGroupsZ = 1;
   }
 
   void TearDown() override {
@@ -33,27 +41,18 @@ struct olLaunchKernelTest : OffloadQueueTest {
   std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
   ol_program_handle_t Program = nullptr;
   ol_kernel_handle_t Kernel = nullptr;
+  ol_kernel_launch_size_args_t LaunchArgs{};
 };
 
 TEST_F(olLaunchKernelTest, Success) {
   void *Mem;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 64, &Mem));
-  ol_kernel_launch_size_args_t LaunchArgs{};
-  LaunchArgs.Dimensions = 1;
-  LaunchArgs.GroupSizeX = 64;
-  LaunchArgs.GroupSizeY = 1;
-  LaunchArgs.GroupSizeZ = 1;
-
-  LaunchArgs.NumGroupsX = 1;
-  LaunchArgs.NumGroupsY = 1;
-  LaunchArgs.NumGroupsZ = 1;
-
   struct {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(
-      olLaunchKernel(Queue, Kernel, &Args, sizeof(Args), &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
+                                &LaunchArgs, nullptr));
 
   ASSERT_SUCCESS(olWaitQueue(Queue));
 
@@ -64,3 +63,22 @@ TEST_F(olLaunchKernelTest, Success) {
 
   ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Mem));
 }
+
+TEST_F(olLaunchKernelTest, SuccessSynchronous) {
+  void *Mem;
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 64, &Mem));
+
+  struct {
+    void *Mem;
+  } Args{Mem};
+
+  ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args),
+                                &LaunchArgs, nullptr));
+
+  int *Data = (int *)Mem;
+  for (int i = 0; i < 64; i++) {
+    ASSERT_EQ(Data[i], i);
+  }
+
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Mem));
+}
diff --git a/offload/unittests/OffloadAPI/enqueue/olMemcpy.cpp b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
similarity index 63%
rename from offload/unittests/OffloadAPI/enqueue/olMemcpy.cpp
rename to offload/unittests/OffloadAPI/memory/olMemcpy.cpp
index c28d5e0a35137..99b6d230a95b3 100644
--- a/offload/unittests/OffloadAPI/enqueue/olMemcpy.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
@@ -70,4 +70,49 @@ TEST_F(olMemcpyTest, SuccessDtoD) {
   ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocB));
 }
 
-TEST_F(olMemcpyTest, SuccessSizeZero) {}
\ No newline at end of file
+TEST_F(olMemcpyTest, SuccessHtoHSync) {
+  constexpr size_t Size = 1024;
+  std::vector<uint8_t> Input(Size, 42);
+  std::vector<uint8_t> Output(Size, 0);
+  ol_device_handle_t Host;
+  ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+  ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Input.data(), Host,
+                          Size, nullptr));
+
+  for (uint8_t Val : Output) {
+    ASSERT_EQ(Val, 42);
+  }
+}
+
+TEST_F(olMemcpyTest, SuccessDtoHSync) {
+  constexpr size_t Size = 1024;
+  void *Alloc;
+  std::vector<uint8_t> Input(Size, 42);
+  std::vector<uint8_t> Output(Size, 0);
+  ol_device_handle_t Host;
+  ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
+  ASSERT_SUCCESS(
+      olMemcpy(nullptr, Alloc, Device, Input.data(), Host, Size, nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(nullptr, Output.data(), Host, Alloc, Device, Size, nullptr));
+  for (uint8_t Val : Output) {
+    ASSERT_EQ(Val, 42);
+  }
+  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olMemcpyTest, SuccessSizeZero) {
+  constexpr size_t Size = 1024;
+  std::vector<uint8_t> Input(Size, 42);
+  std::vector<uint8_t> Output(Size, 0);
+  ol_device_handle_t Host;
+  ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+  // As with std::memcpy, size 0 is allowed. Keep all other arguments valid even
+  // if they aren't used.
+  ASSERT_SUCCESS(
+      olMemcpy(nullptr, Output.data(), Host, Input.data(), Host, 0, nullptr));
+}

>From e8e742319797a7cc8544b504fa0e0d1e1a38b1c2 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Mon, 31 Mar 2025 17:08:04 +0100
Subject: [PATCH 5/9] Drop device, type parameters from olMemFree

---
 offload/liboffload/API/Memory.td              |  2 --
 .../liboffload/include/generated/OffloadAPI.h | 10 +-------
 .../include/generated/OffloadEntryPoints.inc  | 22 ++++++-----------
 .../generated/OffloadImplFuncDecls.inc        |  3 +--
 .../include/generated/OffloadPrint.hpp        |  6 -----
 offload/liboffload/src/OffloadImpl.cpp        | 24 +++++++++++++++++--
 .../OffloadAPI/kernel/olLaunchKernel.cpp      |  4 ++--
 .../OffloadAPI/memory/olMemAlloc.cpp          |  6 ++---
 .../unittests/OffloadAPI/memory/olMemFree.cpp | 19 ++++-----------
 .../unittests/OffloadAPI/memory/olMemcpy.cpp  | 10 ++++----
 10 files changed, 46 insertions(+), 60 deletions(-)

diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index ca0200864d6f0..4a5c770bc1542 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -40,8 +40,6 @@ def : Function {
   let name = "olMemFree";
   let desc = "Frees a memory allocation previously made by olMemAlloc.";
   let params = [
-    Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
-    Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
     Param<"void*", "Address", "address of the allocation to free", PARAM_IN>,
   ];
   let returns = [];
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 236ec438d524b..42d0edfe59976 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -542,14 +542,9 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(
 ///     - ::OL_ERRC_UNINITIALIZED
 ///     - ::OL_ERRC_DEVICE_LOST
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Device`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == Address`
 OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
-    // [in] handle of the device to allocate on
-    ol_device_handle_t Device,
-    // [in] type of the allocation
-    ol_alloc_type_t Type,
     // [in] address of the allocation to free
     void *Address);
 
@@ -891,8 +886,6 @@ typedef struct ol_mem_alloc_params_t {
 /// @brief Function parameters for olMemFree
 /// @details Each entry is a pointer to the parameter passed to the function;
 typedef struct ol_mem_free_params_t {
-  ol_device_handle_t *pDevice;
-  ol_alloc_type_t *pType;
   void **pAddress;
 } ol_mem_free_params_t;
 
@@ -1084,8 +1077,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemAllocWithCodeLoc(
 /// @brief Variant of olMemFree that also sets source code location information
 /// @details See also ::olMemFree
 OL_APIEXPORT ol_result_t OL_APICALL
-olMemFreeWithCodeLoc(ol_device_handle_t Device, ol_alloc_type_t Type,
-                     void *Address, ol_code_location_t *CodeLocation);
+olMemFreeWithCodeLoc(void *Address, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olCreateQueue that also sets source code location
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 92dc3171b21ba..f8fa4f52ae638 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -531,31 +531,24 @@ ol_result_t olMemAllocWithCodeLoc(ol_device_handle_t Device,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olMemFree_val(ol_device_handle_t Device, ol_alloc_type_t Type,
-                               void *Address) {
+ol_impl_result_t olMemFree_val(void *Address) {
   if (offloadConfig().ValidationEnabled) {
-    if (NULL == Device) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-
     if (NULL == Address) {
       return OL_ERRC_INVALID_NULL_POINTER;
     }
   }
 
-  return llvm::offload::olMemFree_impl(Device, Type, Address);
+  return llvm::offload::olMemFree_impl(Address);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olMemFree(ol_device_handle_t Device,
-                                              ol_alloc_type_t Type,
-                                              void *Address) {
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(void *Address) {
   if (offloadConfig().TracingEnabled) {
     llvm::errs() << "---> olMemFree";
   }
 
-  ol_result_t Result = olMemFree_val(Device, Type, Address);
+  ol_result_t Result = olMemFree_val(Address);
 
   if (offloadConfig().TracingEnabled) {
-    ol_mem_free_params_t Params = {&Device, &Type, &Address};
+    ol_mem_free_params_t Params = {&Address};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -564,11 +557,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(ol_device_handle_t Device,
   }
   return Result;
 }
-ol_result_t olMemFreeWithCodeLoc(ol_device_handle_t Device,
-                                 ol_alloc_type_t Type, void *Address,
+ol_result_t olMemFreeWithCodeLoc(void *Address,
                                  ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olMemFree(Device, Type, Address);
+  ol_result_t Result = ::olMemFree(Address);
 
   currentCodeLocation() = nullptr;
   return Result;
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 340eab9b0ec22..482112674edfa 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -43,8 +43,7 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
                                  ol_alloc_type_t Type, size_t Size,
                                  void **AllocationOut);
 
-ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
-                                void *Address);
+ol_impl_result_t olMemFree_impl(void *Address);
 
 ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
                                     ol_queue_handle_t *Queue);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index d6bf470c18e70..7d0aad2689982 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -512,12 +512,6 @@ operator<<(llvm::raw_ostream &os, const struct ol_mem_alloc_params_t *params) {
 
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os, const struct ol_mem_free_params_t *params) {
-  os << ".Device = ";
-  printPtr(os, *params->pDevice);
-  os << ", ";
-  os << ".Type = ";
-  os << *params->pType;
-  os << ", ";
   os << ".Address = ";
   printPtr(os, *params->pAddress);
   return os;
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 5d13d28b76bb7..b698e0dcbf058 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -90,6 +90,17 @@ struct ol_kernel_impl_t {
 namespace llvm {
 namespace offload {
 
+struct AllocInfo {
+  ol_device_handle_t Device;
+  ol_alloc_type_t Type;
+};
+
+using AllocInfoMapT = DenseMap<void *, AllocInfo>;
+AllocInfoMapT &allocInfoMap() {
+  static AllocInfoMapT AllocInfoMap{};
+  return AllocInfoMap;
+}
+
 using PlatformVecT = SmallVector<ol_platform_impl_t, 4>;
 PlatformVecT &Platforms() {
   static PlatformVecT Platforms;
@@ -325,16 +336,25 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
             formatv("Could not create allocation on device {0}", Device).str()};
 
   *AllocationOut = *Alloc;
+  allocInfoMap().insert_or_assign(*Alloc, AllocInfo{Device, Type});
   return OL_SUCCESS;
 }
 
-ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
-                                void *Address) {
+ol_impl_result_t olMemFree_impl(void *Address) {
+  if (!allocInfoMap().contains(Address))
+    return {OL_ERRC_INVALID_ARGUMENT, "Address is not a known allocation"};
+
+  auto AllocInfo = allocInfoMap().at(Address);
+  auto Device = AllocInfo.Device;
+  auto Type = AllocInfo.Type;
+
   auto Res =
       Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type));
   if (Res)
     return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
 
+  allocInfoMap().erase(Address);
+
   return OL_SUCCESS;
 }
 
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 87a0609667e70..3c89e40ea49cb 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -61,7 +61,7 @@ TEST_F(olLaunchKernelTest, Success) {
     ASSERT_EQ(Data[i], i);
   }
 
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Mem));
+  ASSERT_SUCCESS(olMemFree(Mem));
 }
 
 TEST_F(olLaunchKernelTest, SuccessSynchronous) {
@@ -80,5 +80,5 @@ TEST_F(olLaunchKernelTest, SuccessSynchronous) {
     ASSERT_EQ(Data[i], i);
   }
 
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Mem));
+  ASSERT_SUCCESS(olMemFree(Mem));
 }
diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
index 8912d8fcc1bf8..d9bd382217089 100644
--- a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
@@ -16,21 +16,21 @@ TEST_F(olMemAllocTest, SuccessAllocShared) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, &Alloc));
   ASSERT_NE(Alloc, nullptr);
-  olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc);
+  olMemFree(Alloc);
 }
 
 TEST_F(olMemAllocTest, SuccessAllocHost) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
   ASSERT_NE(Alloc, nullptr);
-  olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc);
+  olMemFree(Alloc);
 }
 
 TEST_F(olMemAllocTest, SuccessAllocDevice) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
   ASSERT_NE(Alloc, nullptr);
-  olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+  olMemFree(Alloc);
 }
 
 TEST_F(olMemAllocTest, InvalidNullDevice) {
diff --git a/offload/unittests/OffloadAPI/memory/olMemFree.cpp b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
index 45e1b2a61eace..ae57d38b373cc 100644
--- a/offload/unittests/OffloadAPI/memory/olMemFree.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
@@ -15,33 +15,24 @@ using olMemFreeTest = OffloadDeviceTest;
 TEST_F(olMemFreeTest, SuccessFreeShared) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, &Alloc));
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc));
+  ASSERT_SUCCESS(olMemFree(Alloc));
 }
 
 TEST_F(olMemFreeTest, SuccessFreeHost) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc));
+  ASSERT_SUCCESS(olMemFree(Alloc));
 }
 
 TEST_F(olMemFreeTest, SuccessFreeDevice) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
-}
-
-TEST_F(olMemFreeTest, InvalidNullDevice) {
-  void *Alloc = nullptr;
-  ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
-               olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+  ASSERT_SUCCESS(olMemFree(Alloc));
 }
 
 TEST_F(olMemFreeTest, InvalidNullPtr) {
   void *Alloc = nullptr;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
-               olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olMemFree(nullptr));
+  ASSERT_SUCCESS(olMemFree(Alloc));
 }
diff --git a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
index 99b6d230a95b3..b521acee7ef0a 100644
--- a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
@@ -22,7 +22,7 @@ TEST_F(olMemcpyTest, SuccessHtoD) {
   ASSERT_SUCCESS(
       olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
   olWaitQueue(Queue);
-  olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+  olMemFree(Alloc);
 }
 
 TEST_F(olMemcpyTest, SuccessDtoH) {
@@ -42,7 +42,7 @@ TEST_F(olMemcpyTest, SuccessDtoH) {
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
   }
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+  ASSERT_SUCCESS(olMemFree(Alloc));
 }
 
 TEST_F(olMemcpyTest, SuccessDtoD) {
@@ -66,8 +66,8 @@ TEST_F(olMemcpyTest, SuccessDtoD) {
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
   }
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocA));
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocB));
+  ASSERT_SUCCESS(olMemFree(AllocA));
+  ASSERT_SUCCESS(olMemFree(AllocB));
 }
 
 TEST_F(olMemcpyTest, SuccessHtoHSync) {
@@ -101,7 +101,7 @@ TEST_F(olMemcpyTest, SuccessDtoHSync) {
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
   }
-  ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+  ASSERT_SUCCESS(olMemFree(Alloc));
 }
 
 TEST_F(olMemcpyTest, SuccessSizeZero) {

>From 731a7d84cb0afab6dabdc2cd60c191341d477969 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 1 Apr 2025 13:18:59 +0100
Subject: [PATCH 6/9] Make kernel handles a simple pointer, no create/destroy
 functions

---
 offload/liboffload/API/Common.td              |  3 +-
 offload/liboffload/API/Kernel.td              | 20 ++----
 .../liboffload/include/generated/OffloadAPI.h | 52 ++++-----------
 .../include/generated/OffloadEntryPoints.inc  | 66 +++++--------------
 .../include/generated/OffloadFuncs.inc        |  6 +-
 .../generated/OffloadImplFuncDecls.inc        |  8 +--
 .../include/generated/OffloadPrint.hpp        | 12 +---
 offload/liboffload/src/OffloadImpl.cpp        | 23 ++-----
 offload/unittests/OffloadAPI/CMakeLists.txt   |  3 +-
 .../unittests/OffloadAPI/common/Fixtures.hpp  |  5 +-
 .../OffloadAPI/kernel/olDestroyKernel.cpp     | 22 -------
 .../{olCreateKernel.cpp => olGetKernel.cpp}   | 17 +++--
 .../OffloadAPI/kernel/olLaunchKernel.cpp      |  5 +-
 13 files changed, 59 insertions(+), 183 deletions(-)
 delete mode 100644 offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp
 rename offload/unittests/OffloadAPI/kernel/{olCreateKernel.cpp => olGetKernel.cpp} (54%)

diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index a0a2697e27e77..de7502b540618 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -77,9 +77,10 @@ def : Handle {
   let desc = "Handle of program object";
 }
 
-def : Handle {
+def : Typedef {
   let name = "ol_kernel_handle_t";
   let desc = "Handle of kernel object";
+  let value = "void *";
 }
 
 def : Enum {
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index ef831f7c29801..c49ad0aafb52f 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -11,23 +11,15 @@
 //===----------------------------------------------------------------------===//
 
 def : Function {
-    let name = "olCreateKernel";
-    let desc = "Create a kernel from the function identified by `KernelName` in the given program.";
-    let details = [];
+    let name = "olGetKernel";
+    let desc = "Get a kernel from the function identified by `KernelName` in the given program.";
+    let details = [
+        "The kernel handle returned is owned by the device so does not need to be destroyed."
+    ];
     let params = [
         Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
         Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
-        Param<"ol_kernel_handle_t*", "Kernel", "output pointer for the created kernel", PARAM_OUT>
-    ];
-    let returns = [];
-}
-
-def : Function {
-    let name = "olDestroyKernel";
-    let desc = "Destroy the kernel and free all underlying resources.";
-    let details = [];
-    let params = [
-        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+        Param<"ol_kernel_handle_t*", "Kernel", "output pointer for the fetched kernel", PARAM_OUT>
     ];
     let returns = [];
 }
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 42d0edfe59976..dffdffc04b6d4 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -99,7 +99,7 @@ typedef struct ol_program_impl_t *ol_program_handle_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Handle of kernel object
-typedef struct ol_kernel_impl_t *ol_kernel_handle_t;
+typedef void *ol_kernel_handle_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Defines Return/Error codes
@@ -757,10 +757,12 @@ OL_APIEXPORT ol_result_t OL_APICALL olDestroyProgram(
     ol_program_handle_t Program);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a kernel from the function identified by `KernelName` in the
+/// @brief Get a kernel from the function identified by `KernelName` in the
 /// given program.
 ///
 /// @details
+///    - The kernel handle returned is owned by the device so does not need to
+///    be destroyed.
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
@@ -771,30 +773,14 @@ OL_APIEXPORT ol_result_t OL_APICALL olDestroyProgram(
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == KernelName`
 ///         + `NULL == Kernel`
-OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
+OL_APIEXPORT ol_result_t OL_APICALL olGetKernel(
     // [in] handle of the program
     ol_program_handle_t Program,
     // [in] name of the kernel entry point in the program
     const char *KernelName,
-    // [out] output pointer for the created kernel
+    // [out] output pointer for the fetched kernel
     ol_kernel_handle_t *Kernel);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Destroy the kernel and free all underlying resources.
-///
-/// @details
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Kernel`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-OL_APIEXPORT ol_result_t OL_APICALL olDestroyKernel(
-    // [in] handle of the kernel
-    ol_kernel_handle_t Kernel);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetPlatform
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -969,20 +955,13 @@ typedef struct ol_destroy_program_params_t {
 } ol_destroy_program_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olCreateKernel
+/// @brief Function parameters for olGetKernel
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_create_kernel_params_t {
+typedef struct ol_get_kernel_params_t {
   ol_program_handle_t *pProgram;
   const char **pKernelName;
   ol_kernel_handle_t **pKernel;
-} ol_create_kernel_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olDestroyKernel
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_destroy_kernel_params_t {
-  ol_kernel_handle_t *pKernel;
-} ol_destroy_kernel_params_t;
+} ol_get_kernel_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olInit that also sets source code location information
@@ -1149,20 +1128,13 @@ OL_APIEXPORT ol_result_t OL_APICALL olDestroyProgramWithCodeLoc(
     ol_program_handle_t Program, ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olCreateKernel that also sets source code location
+/// @brief Variant of olGetKernel that also sets source code location
 /// information
-/// @details See also ::olCreateKernel
-OL_APIEXPORT ol_result_t OL_APICALL olCreateKernelWithCodeLoc(
+/// @details See also ::olGetKernel
+OL_APIEXPORT ol_result_t OL_APICALL olGetKernelWithCodeLoc(
     ol_program_handle_t Program, const char *KernelName,
     ol_kernel_handle_t *Kernel, ol_code_location_t *CodeLocation);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olDestroyKernel that also sets source code location
-/// information
-/// @details See also ::olDestroyKernel
-OL_APIEXPORT ol_result_t OL_APICALL olDestroyKernelWithCodeLoc(
-    ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
-
 #if defined(__cplusplus)
 } // extern "C"
 #endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index f8fa4f52ae638..a5a7695bc22c6 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -983,9 +983,9 @@ ol_result_t olDestroyProgramWithCodeLoc(ol_program_handle_t Program,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olCreateKernel_val(ol_program_handle_t Program,
-                                    const char *KernelName,
-                                    ol_kernel_handle_t *Kernel) {
+ol_impl_result_t olGetKernel_val(ol_program_handle_t Program,
+                                 const char *KernelName,
+                                 ol_kernel_handle_t *Kernel) {
   if (offloadConfig().ValidationEnabled) {
     if (NULL == Program) {
       return OL_ERRC_INVALID_NULL_HANDLE;
@@ -1000,19 +1000,19 @@ ol_impl_result_t olCreateKernel_val(ol_program_handle_t Program,
     }
   }
 
-  return llvm::offload::olCreateKernel_impl(Program, KernelName, Kernel);
+  return llvm::offload::olGetKernel_impl(Program, KernelName, Kernel);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(ol_program_handle_t Program,
-                                                   const char *KernelName,
-                                                   ol_kernel_handle_t *Kernel) {
+OL_APIEXPORT ol_result_t OL_APICALL olGetKernel(ol_program_handle_t Program,
+                                                const char *KernelName,
+                                                ol_kernel_handle_t *Kernel) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olCreateKernel";
+    llvm::errs() << "---> olGetKernel";
   }
 
-  ol_result_t Result = olCreateKernel_val(Program, KernelName, Kernel);
+  ol_result_t Result = olGetKernel_val(Program, KernelName, Kernel);
 
   if (offloadConfig().TracingEnabled) {
-    ol_create_kernel_params_t Params = {&Program, &KernelName, &Kernel};
+    ol_get_kernel_params_t Params = {&Program, &KernelName, &Kernel};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -1021,48 +1021,12 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(ol_program_handle_t Program,
   }
   return Result;
 }
-ol_result_t olCreateKernelWithCodeLoc(ol_program_handle_t Program,
-                                      const char *KernelName,
-                                      ol_kernel_handle_t *Kernel,
-                                      ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olCreateKernel(Program, KernelName, Kernel);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olDestroyKernel_val(ol_kernel_handle_t Kernel) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NULL == Kernel) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-  }
-
-  return llvm::offload::olDestroyKernel_impl(Kernel);
-}
-OL_APIEXPORT ol_result_t OL_APICALL olDestroyKernel(ol_kernel_handle_t Kernel) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olDestroyKernel";
-  }
-
-  ol_result_t Result = olDestroyKernel_val(Kernel);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_destroy_kernel_params_t Params = {&Kernel};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olDestroyKernelWithCodeLoc(ol_kernel_handle_t Kernel,
-                                       ol_code_location_t *CodeLocation) {
+ol_result_t olGetKernelWithCodeLoc(ol_program_handle_t Program,
+                                   const char *KernelName,
+                                   ol_kernel_handle_t *Kernel,
+                                   ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olDestroyKernel(Kernel);
+  ol_result_t Result = ::olGetKernel(Program, KernelName, Kernel);
 
   currentCodeLocation() = nullptr;
   return Result;
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index d8fc11a4267fe..f146b252a68ab 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -32,8 +32,7 @@ OFFLOAD_FUNC(olMemcpy)
 OFFLOAD_FUNC(olLaunchKernel)
 OFFLOAD_FUNC(olCreateProgram)
 OFFLOAD_FUNC(olDestroyProgram)
-OFFLOAD_FUNC(olCreateKernel)
-OFFLOAD_FUNC(olDestroyKernel)
+OFFLOAD_FUNC(olGetKernel)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -56,7 +55,6 @@ OFFLOAD_FUNC(olMemcpyWithCodeLoc)
 OFFLOAD_FUNC(olLaunchKernelWithCodeLoc)
 OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
 OFFLOAD_FUNC(olDestroyProgramWithCodeLoc)
-OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
-OFFLOAD_FUNC(olDestroyKernelWithCodeLoc)
+OFFLOAD_FUNC(olGetKernelWithCodeLoc)
 
 #undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 482112674edfa..5d6d42f3a5f5b 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -74,8 +74,6 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
 
 ol_impl_result_t olDestroyProgram_impl(ol_program_handle_t Program);
 
-ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
-                                     const char *KernelName,
-                                     ol_kernel_handle_t *Kernel);
-
-ol_impl_result_t olDestroyKernel_impl(ol_kernel_handle_t Kernel);
+ol_impl_result_t olGetKernel_impl(ol_program_handle_t Program,
+                                  const char *KernelName,
+                                  ol_kernel_handle_t *Kernel);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 7d0aad2689982..04c8e09b75836 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -25,7 +25,6 @@ template <> struct is_handle<ol_context_handle_t> : std::true_type {};
 template <> struct is_handle<ol_queue_handle_t> : std::true_type {};
 template <> struct is_handle<ol_event_handle_t> : std::true_type {};
 template <> struct is_handle<ol_program_handle_t> : std::true_type {};
-template <> struct is_handle<ol_kernel_handle_t> : std::true_type {};
 template <typename T> inline constexpr bool is_handle_v = is_handle<T>::value;
 
 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
@@ -635,8 +634,7 @@ operator<<(llvm::raw_ostream &os,
 }
 
 inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_create_kernel_params_t *params) {
+operator<<(llvm::raw_ostream &os, const struct ol_get_kernel_params_t *params) {
   os << ".Program = ";
   printPtr(os, *params->pProgram);
   os << ", ";
@@ -648,14 +646,6 @@ operator<<(llvm::raw_ostream &os,
   return os;
 }
 
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_destroy_kernel_params_t *params) {
-  os << ".Kernel = ";
-  printPtr(os, *params->pKernel);
-  return os;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 // @brief Print pointer value
 template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index b698e0dcbf058..a67b8bce0f968 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -83,10 +83,6 @@ struct ol_program_impl_t {
   __tgt_device_image DeviceImage;
 };
 
-struct ol_kernel_impl_t {
-  GenericKernelTy *KernelImpl;
-};
-
 namespace llvm {
 namespace offload {
 
@@ -490,9 +486,9 @@ ol_impl_result_t olDestroyProgram_impl(ol_program_handle_t Program) {
   return olDestroy(Program);
 }
 
-ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
-                                     const char *KernelName,
-                                     ol_kernel_handle_t *Kernel) {
+ol_impl_result_t olGetKernel_impl(ol_program_handle_t Program,
+                                  const char *KernelName,
+                                  ol_kernel_handle_t *Kernel) {
 
   auto &Device = Program->Image->getDevice();
   auto KernelImpl = Device.constructKernel(KernelName);
@@ -503,17 +499,11 @@ ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
   if (Err)
     return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
 
-  ol_kernel_handle_t CreatedKernel = new ol_kernel_impl_t();
-  CreatedKernel->KernelImpl = &*KernelImpl;
-  *Kernel = CreatedKernel;
+  *Kernel = &*KernelImpl;
 
   return OL_SUCCESS;
 }
 
-ol_impl_result_t olDestroyKernel_impl(ol_kernel_handle_t Kernel) {
-  return olDestroy(Kernel);
-}
-
 ol_impl_result_t
 olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
                     ol_kernel_handle_t Kernel, const void *ArgumentsData,
@@ -543,8 +533,9 @@ olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
   // Don't do anything with pointer indirection; use arg data as-is
   LaunchArgs.Flags.IsCUDA = true;
 
-  auto Err = Kernel->KernelImpl->launch(*DeviceImpl, LaunchArgs.ArgPtrs,
-                                        nullptr, LaunchArgs, AsyncInfoWrapper);
+  auto *KernelImpl = reinterpret_cast<GenericKernelTy *>(Kernel);
+  auto Err = KernelImpl->launch(*DeviceImpl, LaunchArgs.ArgPtrs, nullptr,
+                                LaunchArgs, AsyncInfoWrapper);
 
   AsyncInfoWrapper.finalize(Err);
   if (Err)
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 4174fcf876471..4ffd07762a4bd 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -22,8 +22,7 @@ add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemcpy.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olCreateProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/program/olDestroyProgram.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olCreateKernel.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olDestroyKernel.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olGetKernel.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/kernel/olLaunchKernel.cpp
     )
 add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON} LibomptUnitTestsDeviceBins)
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index ea26b85803272..f712e8b2f6c8b 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -96,13 +96,10 @@ struct OffloadProgramTest : OffloadDeviceTest {
 struct OffloadKernelTest : OffloadProgramTest {
   void SetUp() override {
     RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUp());
-    ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+    ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
   }
 
   void TearDown() override {
-    if (Kernel) {
-      olDestroyKernel(Kernel);
-    }
     RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown());
   }
 
diff --git a/offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp b/offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp
deleted file mode 100644
index 7bb26180207fa..0000000000000
--- a/offload/unittests/OffloadAPI/kernel/olDestroyKernel.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===------- Offload API tests - olReleaseKernel --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olDestroyKernelTest = OffloadKernelTest;
-
-TEST_F(olDestroyKernelTest, Success) {
-  ASSERT_SUCCESS(olDestroyKernel(Kernel));
-  Kernel = nullptr;
-}
-
-TEST_F(olDestroyKernelTest, InvalidNullHandle) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olDestroyKernel(nullptr));
-}
diff --git a/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp b/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp
similarity index 54%
rename from offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
rename to offload/unittests/OffloadAPI/kernel/olGetKernel.cpp
index 5a6405cb22be1..f320d191ad58f 100644
--- a/offload/unittests/OffloadAPI/kernel/olCreateKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olCreateKernel ---------------------------===//
+//===------- Offload API tests - olGetKernel ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,22 +10,21 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olCreateKernelTest = OffloadProgramTest;
+using olGetKernelTest = OffloadProgramTest;
 
-TEST_F(olCreateKernelTest, Success) {
+TEST_F(olGetKernelTest, Success) {
   ol_kernel_handle_t Kernel = nullptr;
-  ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+  ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
   ASSERT_NE(Kernel, nullptr);
-  ASSERT_SUCCESS(olDestroyKernel(Kernel));
 }
 
-TEST_F(olCreateKernelTest, InvalidNullProgram) {
+TEST_F(olGetKernelTest, InvalidNullProgram) {
   ol_kernel_handle_t Kernel = nullptr;
   ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
-               olCreateKernel(nullptr, "foo", &Kernel));
+               olGetKernel(nullptr, "foo", &Kernel));
 }
 
-TEST_F(olCreateKernelTest, InvalidNullKernelPointer) {
+TEST_F(olGetKernelTest, InvalidNullKernelPointer) {
   ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
-               olCreateKernel(Program, "foo", nullptr));
+               olGetKernel(Program, "foo", nullptr));
 }
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 3c89e40ea49cb..96df393619743 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -17,7 +17,7 @@ struct olLaunchKernelTest : OffloadQueueTest {
     ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
     ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
                                    DeviceBin->getBufferSize(), &Program));
-    ASSERT_SUCCESS(olCreateKernel(Program, "foo", &Kernel));
+    ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
     LaunchArgs.Dimensions = 1;
     LaunchArgs.GroupSizeX = 64;
     LaunchArgs.GroupSizeY = 1;
@@ -29,9 +29,6 @@ struct olLaunchKernelTest : OffloadQueueTest {
   }
 
   void TearDown() override {
-    if (Kernel) {
-      olDestroyKernel(Kernel);
-    }
     if (Program) {
       olDestroyProgram(Program);
     }

>From 4e78abf28614a4334d82e0b37d095de14b3fc0a6 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 1 Apr 2025 13:24:46 +0100
Subject: [PATCH 7/9] Tidy

---
 offload/liboffload/API/Enqueue.td             |  68 -----
 offload/liboffload/API/Kernel.td              |  35 +++
 offload/liboffload/API/Memory.td              |  22 ++
 offload/liboffload/API/OffloadAPI.td          |   1 -
 .../liboffload/include/generated/OffloadAPI.h | 256 ++++++++--------
 .../include/generated/OffloadEntryPoints.inc  | 274 +++++++++---------
 .../include/generated/OffloadFuncs.inc        |   8 +-
 .../generated/OffloadImplFuncDecls.inc        |  24 +-
 .../include/generated/OffloadPrint.hpp        | 102 +++----
 offload/liboffload/src/OffloadImpl.cpp        |   1 -
 10 files changed, 389 insertions(+), 402 deletions(-)
 delete mode 100644 offload/liboffload/API/Enqueue.td

diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
deleted file mode 100644
index 1936c2fffe021..0000000000000
--- a/offload/liboffload/API/Enqueue.td
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- Enqueue.td - Enqueue definitions for Offload -------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains Offload API definitions related to enqueable operations
-//
-//===----------------------------------------------------------------------===//
-
-def : Function {
-    let name = "olMemcpy";
-    let desc = "Enqueue a memcpy operation.";
-    let details = [
-        "For host pointers, use the device returned by olGetHostDevice",
-        "If a queue is specified, at least one device must be a non-host device",
-        "If a queue is not specified, the memcpy happens synchronously"
-    ];
-    let params = [
-        Param<"ol_queue_handle_t", "Queue", "handle of the queue.", PARAM_IN_OPTIONAL>,
-        Param<"void*", "DstPtr", "pointer to copy to", PARAM_IN>,
-        Param<"ol_device_handle_t", "DstDevice", "device that DstPtr belongs to", PARAM_IN>,
-        Param<"void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
-        Param<"ol_device_handle_t", "SrcDevice", "device that SrcPtr belongs to", PARAM_IN>,
-        Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
-        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
-    ];
-    let returns = [
-        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>
-    ];
-}
-
-def : Struct {
-    let name = "ol_kernel_launch_size_args_t";
-    let desc = "Size-related arguments for a kernel launch.";
-    let members = [
-        StructMember<"size_t", "Dimensions", "Number of work dimensions">,
-        StructMember<"size_t", "NumGroupsX", "Number of work groups on the X dimension">,
-        StructMember<"size_t", "NumGroupsY", "Number of work groups on the Y dimension">,
-        StructMember<"size_t", "NumGroupsZ", "Number of work groups on the Z dimension">,
-        StructMember<"size_t", "GroupSizeX", "Size of a work group on the X dimension.">,
-        StructMember<"size_t", "GroupSizeY", "Size of a work group on the Y dimension.">,
-        StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">
-    ];
-}
-
-def : Function {
-    let name = "olLaunchKernel";
-    let desc = "Enqueue a kernel launch with the specified size and parameters.";
-    let details = [
-        "If a queue is not specified, kernel execution happens synchronously"
-    ];
-    let params = [
-        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
-        Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
-        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
-        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
-        Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
-        Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
-        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
-    ];
-    let returns = [
-        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
-        Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
-    ];
-}
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index c49ad0aafb52f..8f4349ed36f35 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -23,3 +23,38 @@ def : Function {
     ];
     let returns = [];
 }
+
+def : Struct {
+    let name = "ol_kernel_launch_size_args_t";
+    let desc = "Size-related arguments for a kernel launch.";
+    let members = [
+        StructMember<"size_t", "Dimensions", "Number of work dimensions">,
+        StructMember<"size_t", "NumGroupsX", "Number of work groups on the X dimension">,
+        StructMember<"size_t", "NumGroupsY", "Number of work groups on the Y dimension">,
+        StructMember<"size_t", "NumGroupsZ", "Number of work groups on the Z dimension">,
+        StructMember<"size_t", "GroupSizeX", "Size of a work group on the X dimension.">,
+        StructMember<"size_t", "GroupSizeY", "Size of a work group on the Y dimension.">,
+        StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">
+    ];
+}
+
+def : Function {
+    let name = "olLaunchKernel";
+    let desc = "Enqueue a kernel launch with the specified size and parameters.";
+    let details = [
+        "If a queue is not specified, kernel execution happens synchronously"
+    ];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
+        Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
+        Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
+        Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [
+        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
+        Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
+    ];
+}
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index 4a5c770bc1542..ed32fb0f6ee50 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -44,3 +44,25 @@ def : Function {
   ];
   let returns = [];
 }
+
+def : Function {
+    let name = "olMemcpy";
+    let desc = "Enqueue a memcpy operation.";
+    let details = [
+        "For host pointers, use the device returned by olGetHostDevice",
+        "If a queue is specified, at least one device must be a non-host device",
+        "If a queue is not specified, the memcpy happens synchronously"
+    ];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue.", PARAM_IN_OPTIONAL>,
+        Param<"void*", "DstPtr", "pointer to copy to", PARAM_IN>,
+        Param<"ol_device_handle_t", "DstDevice", "device that DstPtr belongs to", PARAM_IN>,
+        Param<"void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
+        Param<"ol_device_handle_t", "SrcDevice", "device that SrcPtr belongs to", PARAM_IN>,
+        Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [
+        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>
+    ];
+}
diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td
index f2822b93e6bf8..f9829155b6cea 100644
--- a/offload/liboffload/API/OffloadAPI.td
+++ b/offload/liboffload/API/OffloadAPI.td
@@ -16,6 +16,5 @@ include "Device.td"
 include "Memory.td"
 include "Queue.td"
 include "Event.td"
-include "Enqueue.td"
 include "Program.td"
 include "Kernel.td"
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index dffdffc04b6d4..5e8465e238924 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -548,6 +548,42 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
     // [in] address of the allocation to free
     void *Address);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a memcpy operation.
+///
+/// @details
+///    - For host pointers, use the device returned by olGetHostDevice
+///    - If a queue is specified, at least one device must be a non-host device
+///    - If a queue is not specified, the memcpy happens synchronously
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_ARGUMENT
+///         + `Queue == NULL && EventOut != NULL`
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == DstDevice`
+///         + `NULL == SrcDevice`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == DstPtr`
+///         + `NULL == SrcPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olMemcpy(
+    // [in][optional] handle of the queue.
+    ol_queue_handle_t Queue,
+    // [in] pointer to copy to
+    void *DstPtr,
+    // [in] device that DstPtr belongs to
+    ol_device_handle_t DstDevice,
+    // [in] pointer to copy from
+    void *SrcPtr,
+    // [in] device that SrcPtr belongs to
+    ol_device_handle_t SrcDevice,
+    // [in] size in bytes of data to copy
+    size_t Size,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Create a queue for the given device.
 ///
@@ -631,90 +667,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
     // [in] handle of the event
     ol_event_handle_t Event);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Enqueue a memcpy operation.
-///
-/// @details
-///    - For host pointers, use the device returned by olGetHostDevice
-///    - If a queue is specified, at least one device must be a non-host device
-///    - If a queue is not specified, the memcpy happens synchronously
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_ARGUMENT
-///         + `Queue == NULL && EventOut != NULL`
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == DstDevice`
-///         + `NULL == SrcDevice`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-///         + `NULL == DstPtr`
-///         + `NULL == SrcPtr`
-OL_APIEXPORT ol_result_t OL_APICALL olMemcpy(
-    // [in][optional] handle of the queue.
-    ol_queue_handle_t Queue,
-    // [in] pointer to copy to
-    void *DstPtr,
-    // [in] device that DstPtr belongs to
-    ol_device_handle_t DstDevice,
-    // [in] pointer to copy from
-    void *SrcPtr,
-    // [in] device that SrcPtr belongs to
-    ol_device_handle_t SrcDevice,
-    // [in] size in bytes of data to copy
-    size_t Size,
-    // [out][optional] optional recorded event for the enqueued operation
-    ol_event_handle_t *EventOut);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Size-related arguments for a kernel launch.
-typedef struct ol_kernel_launch_size_args_t {
-  size_t Dimensions; /// Number of work dimensions
-  size_t NumGroupsX; /// Number of work groups on the X dimension
-  size_t NumGroupsY; /// Number of work groups on the Y dimension
-  size_t NumGroupsZ; /// Number of work groups on the Z dimension
-  size_t GroupSizeX; /// Size of a work group on the X dimension.
-  size_t GroupSizeY; /// Size of a work group on the Y dimension.
-  size_t GroupSizeZ; /// Size of a work group on the Z dimension.
-} ol_kernel_launch_size_args_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Enqueue a kernel launch with the specified size and parameters.
-///
-/// @details
-///    - If a queue is not specified, kernel execution happens synchronously
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_ARGUMENT
-///         + `Queue == NULL && EventOut != NULL`
-///     - ::OL_ERRC_INVALID_DEVICE
-///         + If Queue is non-null but does not belong to Device
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Device`
-///         + `NULL == Kernel`
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-///         + `NULL == ArgumentsData`
-///         + `NULL == LaunchSizeArgs`
-OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
-    // [in][optional] handle of the queue
-    ol_queue_handle_t Queue,
-    // [in] handle of the device to execute on
-    ol_device_handle_t Device,
-    // [in] handle of the kernel
-    ol_kernel_handle_t Kernel,
-    // [in] pointer to the kernel argument struct
-    const void *ArgumentsData,
-    // [in] size of the kernel argument struct
-    size_t ArgumentsSize,
-    // [in] pointer to the struct containing launch size parameters
-    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-    // [out][optional] optional recorded event for the enqueued operation
-    ol_event_handle_t *EventOut);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Create a program for the device from the binary image pointed to by
 /// `ProgData`.
@@ -781,6 +733,54 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetKernel(
     // [out] output pointer for the fetched kernel
     ol_kernel_handle_t *Kernel);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Size-related arguments for a kernel launch.
+typedef struct ol_kernel_launch_size_args_t {
+  size_t Dimensions; /// Number of work dimensions
+  size_t NumGroupsX; /// Number of work groups on the X dimension
+  size_t NumGroupsY; /// Number of work groups on the Y dimension
+  size_t NumGroupsZ; /// Number of work groups on the Z dimension
+  size_t GroupSizeX; /// Size of a work group on the X dimension.
+  size_t GroupSizeY; /// Size of a work group on the Y dimension.
+  size_t GroupSizeZ; /// Size of a work group on the Z dimension.
+} ol_kernel_launch_size_args_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a kernel launch with the specified size and parameters.
+///
+/// @details
+///    - If a queue is not specified, kernel execution happens synchronously
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_ARGUMENT
+///         + `Queue == NULL && EventOut != NULL`
+///     - ::OL_ERRC_INVALID_DEVICE
+///         + If Queue is non-null but does not belong to Device
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == ArgumentsData`
+///         + `NULL == LaunchSizeArgs`
+OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
+    // [in][optional] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] handle of the device to execute on
+    ol_device_handle_t Device,
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel,
+    // [in] pointer to the kernel argument struct
+    const void *ArgumentsData,
+    // [in] size of the kernel argument struct
+    size_t ArgumentsSize,
+    // [in] pointer to the struct containing launch size parameters
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetPlatform
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -875,6 +875,19 @@ typedef struct ol_mem_free_params_t {
   void **pAddress;
 } ol_mem_free_params_t;
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemcpy
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_memcpy_params_t {
+  ol_queue_handle_t *pQueue;
+  void **pDstPtr;
+  ol_device_handle_t *pDstDevice;
+  void **pSrcPtr;
+  ol_device_handle_t *pSrcDevice;
+  size_t *pSize;
+  ol_event_handle_t **pEventOut;
+} ol_memcpy_params_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olCreateQueue
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -911,32 +924,6 @@ typedef struct ol_wait_event_params_t {
   ol_event_handle_t *pEvent;
 } ol_wait_event_params_t;
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olMemcpy
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_memcpy_params_t {
-  ol_queue_handle_t *pQueue;
-  void **pDstPtr;
-  ol_device_handle_t *pDstDevice;
-  void **pSrcPtr;
-  ol_device_handle_t *pSrcDevice;
-  size_t *pSize;
-  ol_event_handle_t **pEventOut;
-} ol_memcpy_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olLaunchKernel
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_launch_kernel_params_t {
-  ol_queue_handle_t *pQueue;
-  ol_device_handle_t *pDevice;
-  ol_kernel_handle_t *pKernel;
-  const void **pArgumentsData;
-  size_t *pArgumentsSize;
-  const ol_kernel_launch_size_args_t **pLaunchSizeArgs;
-  ol_event_handle_t **pEventOut;
-} ol_launch_kernel_params_t;
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olCreateProgram
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -963,6 +950,19 @@ typedef struct ol_get_kernel_params_t {
   ol_kernel_handle_t **pKernel;
 } ol_get_kernel_params_t;
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olLaunchKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_launch_kernel_params_t {
+  ol_queue_handle_t *pQueue;
+  ol_device_handle_t *pDevice;
+  ol_kernel_handle_t *pKernel;
+  const void **pArgumentsData;
+  size_t *pArgumentsSize;
+  const ol_kernel_launch_size_args_t **pLaunchSizeArgs;
+  ol_event_handle_t **pEventOut;
+} ol_launch_kernel_params_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olInit that also sets source code location information
 /// @details See also ::olInit
@@ -1058,6 +1058,14 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemAllocWithCodeLoc(
 OL_APIEXPORT ol_result_t OL_APICALL
 olMemFreeWithCodeLoc(void *Address, ol_code_location_t *CodeLocation);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemcpy that also sets source code location information
+/// @details See also ::olMemcpy
+OL_APIEXPORT ol_result_t OL_APICALL olMemcpyWithCodeLoc(
+    ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+    void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olCreateQueue that also sets source code location
 /// information
@@ -1094,24 +1102,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olDestroyEventWithCodeLoc(
 OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
     ol_event_handle_t Event, ol_code_location_t *CodeLocation);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olMemcpy that also sets source code location information
-/// @details See also ::olMemcpy
-OL_APIEXPORT ol_result_t OL_APICALL olMemcpyWithCodeLoc(
-    ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
-    void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
-    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olLaunchKernel that also sets source code location
-/// information
-/// @details See also ::olLaunchKernel
-OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernelWithCodeLoc(
-    ol_queue_handle_t Queue, ol_device_handle_t Device,
-    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
-    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olCreateProgram that also sets source code location
 /// information
@@ -1135,6 +1125,16 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetKernelWithCodeLoc(
     ol_program_handle_t Program, const char *KernelName,
     ol_kernel_handle_t *Kernel, ol_code_location_t *CodeLocation);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olLaunchKernel that also sets source code location
+/// information
+/// @details See also ::olLaunchKernel
+OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernelWithCodeLoc(
+    ol_queue_handle_t Queue, ol_device_handle_t Device,
+    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
 #if defined(__cplusplus)
 } // extern "C"
 #endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index a5a7695bc22c6..a02d734bc5773 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -566,6 +566,71 @@ ol_result_t olMemFreeWithCodeLoc(void *Address,
   return Result;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
+                              ol_device_handle_t DstDevice, void *SrcPtr,
+                              ol_device_handle_t SrcDevice, size_t Size,
+                              ol_event_handle_t *EventOut) {
+  if (offloadConfig().ValidationEnabled) {
+    if (Queue == NULL && EventOut != NULL) {
+      return OL_ERRC_INVALID_ARGUMENT;
+    }
+
+    if (NULL == DstDevice) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == SrcDevice) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == DstPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == SrcPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olMemcpy_impl(Queue, DstPtr, DstDevice, SrcPtr,
+                                      SrcDevice, Size, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olMemcpy(ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+         void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+         ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olMemcpy";
+  }
+
+  ol_result_t Result =
+      olMemcpy_val(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_memcpy_params_t Params = {&Queue,     &DstPtr, &DstDevice, &SrcPtr,
+                                 &SrcDevice, &Size,   &EventOut};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
+                                ol_device_handle_t DstDevice, void *SrcPtr,
+                                ol_device_handle_t SrcDevice, size_t Size,
+                                ol_event_handle_t *EventOut,
+                                ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result =
+      ::olMemcpy(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olCreateQueue_val(ol_device_handle_t Device,
                                    ol_queue_handle_t *Queue) {
@@ -753,143 +818,6 @@ ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
   return Result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
-                              ol_device_handle_t DstDevice, void *SrcPtr,
-                              ol_device_handle_t SrcDevice, size_t Size,
-                              ol_event_handle_t *EventOut) {
-  if (offloadConfig().ValidationEnabled) {
-    if (Queue == NULL && EventOut != NULL) {
-      return OL_ERRC_INVALID_ARGUMENT;
-    }
-
-    if (NULL == DstDevice) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-
-    if (NULL == SrcDevice) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-
-    if (NULL == DstPtr) {
-      return OL_ERRC_INVALID_NULL_POINTER;
-    }
-
-    if (NULL == SrcPtr) {
-      return OL_ERRC_INVALID_NULL_POINTER;
-    }
-  }
-
-  return llvm::offload::olMemcpy_impl(Queue, DstPtr, DstDevice, SrcPtr,
-                                      SrcDevice, Size, EventOut);
-}
-OL_APIEXPORT ol_result_t OL_APICALL
-olMemcpy(ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
-         void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
-         ol_event_handle_t *EventOut) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olMemcpy";
-  }
-
-  ol_result_t Result =
-      olMemcpy_val(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size, EventOut);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_memcpy_params_t Params = {&Queue,     &DstPtr, &DstDevice, &SrcPtr,
-                                 &SrcDevice, &Size,   &EventOut};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
-                                ol_device_handle_t DstDevice, void *SrcPtr,
-                                ol_device_handle_t SrcDevice, size_t Size,
-                                ol_event_handle_t *EventOut,
-                                ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result =
-      ::olMemcpy(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size, EventOut);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t
-olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
-                   ol_kernel_handle_t Kernel, const void *ArgumentsData,
-                   size_t ArgumentsSize,
-                   const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                   ol_event_handle_t *EventOut) {
-  if (offloadConfig().ValidationEnabled) {
-    if (Queue == NULL && EventOut != NULL) {
-      return OL_ERRC_INVALID_ARGUMENT;
-    }
-
-    if (NULL == Device) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-
-    if (NULL == Kernel) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-
-    if (NULL == ArgumentsData) {
-      return OL_ERRC_INVALID_NULL_POINTER;
-    }
-
-    if (NULL == LaunchSizeArgs) {
-      return OL_ERRC_INVALID_NULL_POINTER;
-    }
-  }
-
-  return llvm::offload::olLaunchKernel_impl(Queue, Device, Kernel,
-                                            ArgumentsData, ArgumentsSize,
-                                            LaunchSizeArgs, EventOut);
-}
-OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
-    ol_queue_handle_t Queue, ol_device_handle_t Device,
-    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
-    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-    ol_event_handle_t *EventOut) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olLaunchKernel";
-  }
-
-  ol_result_t Result =
-      olLaunchKernel_val(Queue, Device, Kernel, ArgumentsData, ArgumentsSize,
-                         LaunchSizeArgs, EventOut);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_launch_kernel_params_t Params = {
-        &Queue,         &Device,         &Kernel,  &ArgumentsData,
-        &ArgumentsSize, &LaunchSizeArgs, &EventOut};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olLaunchKernelWithCodeLoc(
-    ol_queue_handle_t Queue, ol_device_handle_t Device,
-    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
-    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result =
-      ::olLaunchKernel(Queue, Device, Kernel, ArgumentsData, ArgumentsSize,
-                       LaunchSizeArgs, EventOut);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olCreateProgram_val(ol_device_handle_t Device,
                                      const void *ProgData, size_t ProgDataSize,
@@ -1031,3 +959,75 @@ ol_result_t olGetKernelWithCodeLoc(ol_program_handle_t Program,
   currentCodeLocation() = nullptr;
   return Result;
 }
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t
+olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
+                   ol_kernel_handle_t Kernel, const void *ArgumentsData,
+                   size_t ArgumentsSize,
+                   const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                   ol_event_handle_t *EventOut) {
+  if (offloadConfig().ValidationEnabled) {
+    if (Queue == NULL && EventOut != NULL) {
+      return OL_ERRC_INVALID_ARGUMENT;
+    }
+
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == ArgumentsData) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == LaunchSizeArgs) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olLaunchKernel_impl(Queue, Device, Kernel,
+                                            ArgumentsData, ArgumentsSize,
+                                            LaunchSizeArgs, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
+    ol_queue_handle_t Queue, ol_device_handle_t Device,
+    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olLaunchKernel";
+  }
+
+  ol_result_t Result =
+      olLaunchKernel_val(Queue, Device, Kernel, ArgumentsData, ArgumentsSize,
+                         LaunchSizeArgs, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_launch_kernel_params_t Params = {
+        &Queue,         &Device,         &Kernel,  &ArgumentsData,
+        &ArgumentsSize, &LaunchSizeArgs, &EventOut};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olLaunchKernelWithCodeLoc(
+    ol_queue_handle_t Queue, ol_device_handle_t Device,
+    ol_kernel_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize,
+    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result =
+      ::olLaunchKernel(Queue, Device, Kernel, ArgumentsData, ArgumentsSize,
+                       LaunchSizeArgs, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index f146b252a68ab..d488d4643c093 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -23,16 +23,16 @@ OFFLOAD_FUNC(olGetDeviceInfoSize)
 OFFLOAD_FUNC(olGetHostDevice)
 OFFLOAD_FUNC(olMemAlloc)
 OFFLOAD_FUNC(olMemFree)
+OFFLOAD_FUNC(olMemcpy)
 OFFLOAD_FUNC(olCreateQueue)
 OFFLOAD_FUNC(olDestroyQueue)
 OFFLOAD_FUNC(olWaitQueue)
 OFFLOAD_FUNC(olDestroyEvent)
 OFFLOAD_FUNC(olWaitEvent)
-OFFLOAD_FUNC(olMemcpy)
-OFFLOAD_FUNC(olLaunchKernel)
 OFFLOAD_FUNC(olCreateProgram)
 OFFLOAD_FUNC(olDestroyProgram)
 OFFLOAD_FUNC(olGetKernel)
+OFFLOAD_FUNC(olLaunchKernel)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -46,15 +46,15 @@ OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
 OFFLOAD_FUNC(olGetHostDeviceWithCodeLoc)
 OFFLOAD_FUNC(olMemAllocWithCodeLoc)
 OFFLOAD_FUNC(olMemFreeWithCodeLoc)
+OFFLOAD_FUNC(olMemcpyWithCodeLoc)
 OFFLOAD_FUNC(olCreateQueueWithCodeLoc)
 OFFLOAD_FUNC(olDestroyQueueWithCodeLoc)
 OFFLOAD_FUNC(olWaitQueueWithCodeLoc)
 OFFLOAD_FUNC(olDestroyEventWithCodeLoc)
 OFFLOAD_FUNC(olWaitEventWithCodeLoc)
-OFFLOAD_FUNC(olMemcpyWithCodeLoc)
-OFFLOAD_FUNC(olLaunchKernelWithCodeLoc)
 OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
 OFFLOAD_FUNC(olDestroyProgramWithCodeLoc)
 OFFLOAD_FUNC(olGetKernelWithCodeLoc)
+OFFLOAD_FUNC(olLaunchKernelWithCodeLoc)
 
 #undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 5d6d42f3a5f5b..4cf24b1e71fbb 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -45,6 +45,11 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
 
 ol_impl_result_t olMemFree_impl(void *Address);
 
+ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+                               ol_device_handle_t DstDevice, void *SrcPtr,
+                               ol_device_handle_t SrcDevice, size_t Size,
+                               ol_event_handle_t *EventOut);
+
 ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
                                     ol_queue_handle_t *Queue);
 
@@ -56,18 +61,6 @@ ol_impl_result_t olDestroyEvent_impl(ol_event_handle_t Event);
 
 ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
 
-ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
-                               ol_device_handle_t DstDevice, void *SrcPtr,
-                               ol_device_handle_t SrcDevice, size_t Size,
-                               ol_event_handle_t *EventOut);
-
-ol_impl_result_t
-olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
-                    ol_kernel_handle_t Kernel, const void *ArgumentsData,
-                    size_t ArgumentsSize,
-                    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                    ol_event_handle_t *EventOut);
-
 ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
                                       const void *ProgData, size_t ProgDataSize,
                                       ol_program_handle_t *Program);
@@ -77,3 +70,10 @@ ol_impl_result_t olDestroyProgram_impl(ol_program_handle_t Program);
 ol_impl_result_t olGetKernel_impl(ol_program_handle_t Program,
                                   const char *KernelName,
                                   ol_kernel_handle_t *Kernel);
+
+ol_impl_result_t
+olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
+                    ol_kernel_handle_t Kernel, const void *ArgumentsData,
+                    size_t ArgumentsSize,
+                    const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+                    ol_event_handle_t *EventOut);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 04c8e09b75836..247d2095de9b2 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -516,6 +516,31 @@ operator<<(llvm::raw_ostream &os, const struct ol_mem_free_params_t *params) {
   return os;
 }
 
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                     const struct ol_memcpy_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".DstPtr = ";
+  printPtr(os, *params->pDstPtr);
+  os << ", ";
+  os << ".DstDevice = ";
+  printPtr(os, *params->pDstDevice);
+  os << ", ";
+  os << ".SrcPtr = ";
+  printPtr(os, *params->pSrcPtr);
+  os << ", ";
+  os << ".SrcDevice = ";
+  printPtr(os, *params->pSrcDevice);
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
            const struct ol_create_queue_params_t *params) {
@@ -557,57 +582,6 @@ operator<<(llvm::raw_ostream &os, const struct ol_wait_event_params_t *params) {
   return os;
 }
 
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                                     const struct ol_memcpy_params_t *params) {
-  os << ".Queue = ";
-  printPtr(os, *params->pQueue);
-  os << ", ";
-  os << ".DstPtr = ";
-  printPtr(os, *params->pDstPtr);
-  os << ", ";
-  os << ".DstDevice = ";
-  printPtr(os, *params->pDstDevice);
-  os << ", ";
-  os << ".SrcPtr = ";
-  printPtr(os, *params->pSrcPtr);
-  os << ", ";
-  os << ".SrcDevice = ";
-  printPtr(os, *params->pSrcDevice);
-  os << ", ";
-  os << ".Size = ";
-  os << *params->pSize;
-  os << ", ";
-  os << ".EventOut = ";
-  printPtr(os, *params->pEventOut);
-  return os;
-}
-
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_launch_kernel_params_t *params) {
-  os << ".Queue = ";
-  printPtr(os, *params->pQueue);
-  os << ", ";
-  os << ".Device = ";
-  printPtr(os, *params->pDevice);
-  os << ", ";
-  os << ".Kernel = ";
-  printPtr(os, *params->pKernel);
-  os << ", ";
-  os << ".ArgumentsData = ";
-  printPtr(os, *params->pArgumentsData);
-  os << ", ";
-  os << ".ArgumentsSize = ";
-  os << *params->pArgumentsSize;
-  os << ", ";
-  os << ".LaunchSizeArgs = ";
-  printPtr(os, *params->pLaunchSizeArgs);
-  os << ", ";
-  os << ".EventOut = ";
-  printPtr(os, *params->pEventOut);
-  return os;
-}
-
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
            const struct ol_create_program_params_t *params) {
@@ -646,6 +620,32 @@ operator<<(llvm::raw_ostream &os, const struct ol_get_kernel_params_t *params) {
   return os;
 }
 
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_launch_kernel_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  os << ", ";
+  os << ".ArgumentsData = ";
+  printPtr(os, *params->pArgumentsData);
+  os << ", ";
+  os << ".ArgumentsSize = ";
+  os << *params->pArgumentsSize;
+  os << ", ";
+  os << ".LaunchSizeArgs = ";
+  printPtr(os, *params->pLaunchSizeArgs);
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // @brief Print pointer value
 template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index a67b8bce0f968..08ca7d5640360 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -476,7 +476,6 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
   }
 
   Prog->Image = *Res;
-  // Prog->ImageData = std::move(ImageData);
   *Program = Prog;
 
   return OL_SUCCESS;

>From 833af6ef219df4a535f4acb64b4ec1678ecd234a Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 1 Apr 2025 13:49:58 +0100
Subject: [PATCH 8/9] Add dynamic shared memory arg for olLaunchKernel

---
 offload/liboffload/API/Kernel.td                  |  3 ++-
 offload/liboffload/include/generated/OffloadAPI.h | 15 ++++++++-------
 .../liboffload/include/generated/OffloadPrint.hpp |  3 +++
 offload/liboffload/src/OffloadImpl.cpp            |  1 +
 .../OffloadAPI/kernel/olLaunchKernel.cpp          |  2 ++
 5 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 8f4349ed36f35..247f9c1bf5b6a 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -34,7 +34,8 @@ def : Struct {
         StructMember<"size_t", "NumGroupsZ", "Number of work groups on the Z dimension">,
         StructMember<"size_t", "GroupSizeX", "Size of a work group on the X dimension.">,
         StructMember<"size_t", "GroupSizeY", "Size of a work group on the Y dimension.">,
-        StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">
+        StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">,
+        StructMember<"size_t", "DynSharedMemory", "Size of dynamic shared memory in bytes.">
     ];
 }
 
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 5e8465e238924..8c304244391fb 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -736,13 +736,14 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetKernel(
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Size-related arguments for a kernel launch.
 typedef struct ol_kernel_launch_size_args_t {
-  size_t Dimensions; /// Number of work dimensions
-  size_t NumGroupsX; /// Number of work groups on the X dimension
-  size_t NumGroupsY; /// Number of work groups on the Y dimension
-  size_t NumGroupsZ; /// Number of work groups on the Z dimension
-  size_t GroupSizeX; /// Size of a work group on the X dimension.
-  size_t GroupSizeY; /// Size of a work group on the Y dimension.
-  size_t GroupSizeZ; /// Size of a work group on the Z dimension.
+  size_t Dimensions;      /// Number of work dimensions
+  size_t NumGroupsX;      /// Number of work groups on the X dimension
+  size_t NumGroupsY;      /// Number of work groups on the Y dimension
+  size_t NumGroupsZ;      /// Number of work groups on the Z dimension
+  size_t GroupSizeX;      /// Size of a work group on the X dimension.
+  size_t GroupSizeY;      /// Size of a work group on the Y dimension.
+  size_t GroupSizeZ;      /// Size of a work group on the Z dimension.
+  size_t DynSharedMemory; /// Size of dynamic shared memory in bytes.
 } ol_kernel_launch_size_args_t;
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 247d2095de9b2..cbfa74fc70394 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -362,6 +362,9 @@ operator<<(llvm::raw_ostream &os,
   os << ", ";
   os << ".GroupSizeZ = ";
   os << params.GroupSizeZ;
+  os << ", ";
+  os << ".DynSharedMemory = ";
+  os << params.DynSharedMemory;
   os << "}";
   return os;
 }
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 08ca7d5640360..0f0dba499b518 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -524,6 +524,7 @@ olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
   LaunchArgs.ThreadLimit[0] = LaunchSizeArgs->GroupSizeX;
   LaunchArgs.ThreadLimit[1] = LaunchSizeArgs->GroupSizeY;
   LaunchArgs.ThreadLimit[2] = LaunchSizeArgs->GroupSizeZ;
+  LaunchArgs.DynCGroupMem = LaunchSizeArgs->DynSharedMemory;
 
   KernelLaunchParamsTy Params;
   Params.Data = const_cast<void *>(ArgumentsData);
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 96df393619743..0f71fd6e7f3dc 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -26,6 +26,8 @@ struct olLaunchKernelTest : OffloadQueueTest {
     LaunchArgs.NumGroupsX = 1;
     LaunchArgs.NumGroupsY = 1;
     LaunchArgs.NumGroupsZ = 1;
+
+    LaunchArgs.DynSharedMemory = 0;
   }
 
   void TearDown() override {

>From a8a901feed65cafe74a9e2119b21dcb1e9a97da5 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 15 Apr 2025 15:22:19 +0100
Subject: [PATCH 9/9] Implement filtered device/platform discovery, demote
 platforms

---
 offload/liboffload/API/APIDefs.td             |   2 +-
 offload/liboffload/API/Device.td              |  60 ++++-
 offload/liboffload/API/Platform.td            |  34 ---
 .../liboffload/include/generated/OffloadAPI.h | 213 +++++++++--------
 .../include/generated/OffloadEntryPoints.inc  | 221 +++++++++---------
 .../include/generated/OffloadFuncs.inc        |  12 +-
 .../generated/OffloadImplFuncDecls.inc        |  21 +-
 .../include/generated/OffloadPrint.hpp        |  69 +++---
 offload/liboffload/src/OffloadImpl.cpp        | 149 +++++++-----
 offload/tools/offload-tblgen/APIGen.cpp       |  15 ++
 offload/tools/offload-tblgen/PrintGen.cpp     |   3 +
 offload/tools/offload-tblgen/RecordTypes.hpp  |  18 ++
 offload/unittests/OffloadAPI/CMakeLists.txt   |   2 -
 .../OffloadAPI/common/Environment.cpp         |  97 ++++----
 .../OffloadAPI/common/Environment.hpp         |   7 +-
 .../unittests/OffloadAPI/common/Fixtures.hpp  |  25 +-
 .../OffloadAPI/device/olGetDevice.cpp         |  15 +-
 .../OffloadAPI/device/olGetDeviceCount.cpp    |  12 +-
 .../OffloadAPI/device/olGetDeviceInfo.cpp     |   2 +-
 .../OffloadAPI/kernel/olLaunchKernel.cpp      |   2 +-
 .../OffloadAPI/platform/olGetPlatform.cpp     |  28 ---
 .../platform/olGetPlatformCount.cpp           |  22 --
 .../OffloadAPI/program/olCreateProgram.cpp    |   2 +-
 23 files changed, 542 insertions(+), 489 deletions(-)
 delete mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatform.cpp
 delete mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp

diff --git a/offload/liboffload/API/APIDefs.td b/offload/liboffload/API/APIDefs.td
index cee4adea1d9f6..640932dcf8464 100644
--- a/offload/liboffload/API/APIDefs.td
+++ b/offload/liboffload/API/APIDefs.td
@@ -199,7 +199,7 @@ class Typedef : APIObject { string value; }
 
 class FptrTypedef : APIObject {
   list<Param> params;
-  list<Return> returns;
+  string return;
 }
 
 class Macro : APIObject {
diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index 36fbdcfd05153..333f6bdf41a5b 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -36,22 +36,20 @@ def : Enum {
 
 def : Function {
   let name = "olGetDeviceCount";
-  let desc = "Retrieves the number of available devices within a platform.";
+  let desc = "Retrieves the number of available devices.";
   let params = [
-    Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>,
     Param<"uint32_t*", "NumDevices", "pointer to the number of devices.", PARAM_OUT>
   ];
   let returns = [];
 }
 
 def : Function {
-  let name = "olGetDevice";
-  let desc = "Retrieves devices within a platform.";
+  let name = "olGetDevices";
+  let desc = "Retrieves devices.";
   let details = [
     "Multiple calls to this function will return identical device handles, in the same order.",
   ];
   let params = [
-    Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>,
     Param<"uint32_t", "NumEntries", "the number of devices to be added to phDevices, which must be greater than zero", PARAM_IN>,
     RangedParam<"ol_device_handle_t*", "Devices", "Array of device handles. "
         "If NumEntries is less than the number of devices available, then this function shall only retrieve that number of devices.", PARAM_OUT,
@@ -64,6 +62,58 @@ def : Function {
   ];
 }
 
+def : FptrTypedef {
+  let name = "ol_platform_filter_cb_t";
+  let desc = "User-provided function to determine whether a platform is selected.";
+  let params = [
+    Param<"ol_platform_backend_t", "Backend", "the backend of the platform which is selected for filtering", PARAM_IN>,
+    Param<"const char*", "Name", "the name of the platform which is selected for filtering", PARAM_IN>
+  ];
+  let return = "bool";
+}
+
+def : FptrTypedef {
+  let name = "ol_device_filter_cb_t";
+  let desc = "User-provided function to determine whether a device is selected.";
+  let params = [
+    Param<"ol_device_type_t", "Type", "the type of the device which is selected for filtering", PARAM_IN>
+  ];
+  let return = "bool";
+}
+
+def : Function {
+  let name = "olGetFilteredDevices";
+  let desc = "Retrieve a subset of the available devices";
+  let details = [
+    "Platforms and devices are lazily initialized when they are first filtered",
+    "Use MaxNumDevices to stop device and platform discovery after a fixed number of devices",
+    "Multiple calls to this function will return identical device handles, in the same order.",
+  ];
+  let params = [
+    Param<"uint32_t", "MaxNumDevices", "the maximum number of devices to be added to phDevices, which must be greater than zero", PARAM_IN>,
+    Param<"ol_platform_filter_cb_t", "PlatformFilter", "the callback used to decide whether a platform is included", PARAM_IN>,
+    Param<"ol_device_filter_cb_t", "DeviceFilter", "the callback used to decide whether a platform is included", PARAM_IN>,
+    Param<"ol_device_handle_t*", "FilteredDevices", "output pointer for the selected devices", PARAM_OUT>,
+  ];
+  let returns = [];
+}
+
+def : Function {
+  let name = "olGetFilteredDevicesCount";
+  let desc = "Retrieve the number of devices that would be returned by olGetFilteredDevices with the given filters.";
+  let details = [
+      "Platforms and devices are lazily initialized when they are first filtered",
+      "Use MaxNumDevices to stop device and platform discovery after a fixed number of devices",
+  ];
+  let params = [
+    Param<"uint32_t", "MaxNumDevices", "the maximum number of devices to be added to phDevices; a value of 0 implies no limit.", PARAM_IN>,
+    Param<"ol_platform_filter_cb_t", "PlatformFilter", "the callback used to decide whether a platform is included", PARAM_IN>,
+    Param<"ol_device_filter_cb_t", "DeviceFilter", "the callback used to decide whether a platform is included", PARAM_IN>,
+    Param<"uint32_t*", "NumFilteredDevices", "output pointer for the number of selected devices", PARAM_OUT>
+  ];
+  let returns = [];
+}
+
 def : Function {
   let name = "olGetDeviceInfo";
   let desc = "Queries the given property of the device.";
diff --git a/offload/liboffload/API/Platform.td b/offload/liboffload/API/Platform.td
index f2a09fa41a338..f6480c4f2f64d 100644
--- a/offload/liboffload/API/Platform.td
+++ b/offload/liboffload/API/Platform.td
@@ -9,40 +9,6 @@
 // This file contains Offload API definitions related to the Platform handle
 //
 //===----------------------------------------------------------------------===//
-def : Function {
-  let name = "olGetPlatform";
-  let desc = "Retrieves all available platforms.";
-  let details = [
-    "Multiple calls to this function will return identical platforms handles, in the same order.",
-  ];
-  let params = [
-    Param<"uint32_t", "NumEntries",
-      "The number of platforms to be added to Platforms. NumEntries must be "
-      "greater than zero.",
-      PARAM_IN>,
-    RangedParam<"ol_platform_handle_t*", "Platforms", 
-      "Array of handle of platforms. If NumEntries is less than the number of "
-      "platforms available, then olGetPlatform shall only retrieve that "
-      "number of platforms.",
-      PARAM_OUT, Range<"0", "NumEntries">>
-  ];
-  let returns = [
-    Return<"OL_ERRC_INVALID_SIZE", [
-      "`NumEntries == 0`"
-    ]>
-  ];
-}
-
-def : Function {
-  let name = "olGetPlatformCount";
-  let desc = "Retrieves the number of available platforms.";
-  let params = [
-    Param<"uint32_t*",
-      "NumPlatforms", "returns the total number of platforms available.",
-      PARAM_OUT>
-  ];
-  let returns = [];
-}
 
 def : Enum {
   let name = "ol_platform_info_t";
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 8c304244391fb..383853afc8897 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -201,47 +201,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olInit();
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 OL_APIEXPORT ol_result_t OL_APICALL olShutDown();
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves all available platforms.
-///
-/// @details
-///    - Multiple calls to this function will return identical platforms
-///    handles, in the same order.
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_SIZE
-///         + `NumEntries == 0`
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-///         + `NULL == Platforms`
-OL_APIEXPORT ol_result_t OL_APICALL olGetPlatform(
-    // [in] The number of platforms to be added to Platforms. NumEntries must be
-    // greater than zero.
-    uint32_t NumEntries,
-    // [out] Array of handle of platforms. If NumEntries is less than the number
-    // of platforms available, then olGetPlatform shall only retrieve that
-    // number of platforms.
-    ol_platform_handle_t *Platforms);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves the number of available platforms.
-///
-/// @details
-///
-/// @returns
-///     - ::OL_RESULT_SUCCESS
-///     - ::OL_ERRC_UNINITIALIZED
-///     - ::OL_ERRC_DEVICE_LOST
-///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///     - ::OL_ERRC_INVALID_NULL_POINTER
-///         + `NULL == NumPlatforms`
-OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(
-    // [out] returns the total number of platforms available.
-    uint32_t *NumPlatforms);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Supported platform info.
 typedef enum ol_platform_info_t {
@@ -372,7 +331,7 @@ typedef enum ol_device_info_t {
 } ol_device_info_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves the number of available devices within a platform.
+/// @brief Retrieves the number of available devices.
 ///
 /// @details
 ///
@@ -381,17 +340,14 @@ typedef enum ol_device_info_t {
 ///     - ::OL_ERRC_UNINITIALIZED
 ///     - ::OL_ERRC_DEVICE_LOST
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Platform`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == NumDevices`
 OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCount(
-    // [in] handle of the platform instance
-    ol_platform_handle_t Platform,
     // [out] pointer to the number of devices.
     uint32_t *NumDevices);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves devices within a platform.
+/// @brief Retrieves devices.
 ///
 /// @details
 ///    - Multiple calls to this function will return identical device handles,
@@ -404,12 +360,9 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCount(
 ///     - ::OL_ERRC_INVALID_SIZE
 ///         + `NumEntries == 0`
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
-///         + `NULL == Platform`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
 ///         + `NULL == Devices`
-OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(
-    // [in] handle of the platform instance
-    ol_platform_handle_t Platform,
+OL_APIEXPORT ol_result_t OL_APICALL olGetDevices(
     // [in] the number of devices to be added to phDevices, which must be
     // greater than zero
     uint32_t NumEntries,
@@ -418,6 +371,77 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(
     // devices.
     ol_device_handle_t *Devices);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief User-provided function to determine whether a platform is selected.
+typedef bool (*ol_platform_filter_cb_t)(
+    // the backend of the platform which is selected for filtering
+    ol_platform_backend_t Backend,
+    // the name of the platform which is selected for filtering
+    const char *Name);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief User-provided function to determine whether a device is selected.
+typedef bool (*ol_device_filter_cb_t)(
+    // the type of the device which is selected for filtering
+    ol_device_type_t Type);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieve a subset of the available devices
+///
+/// @details
+///    - Platforms and devices are lazily initialized when they are first
+///    filtered
+///    - Use MaxNumDevices to stop device and platform discovery after a fixed
+///    number of devices
+///    - Multiple calls to this function will return identical device handles,
+///    in the same order.
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == FilteredDevices`
+OL_APIEXPORT ol_result_t OL_APICALL olGetFilteredDevices(
+    // [in] the maximum number of devices to be added to phDevices, which must
+    // be greater than zero
+    uint32_t MaxNumDevices,
+    // [in] the callback used to decide whether a platform is included
+    ol_platform_filter_cb_t PlatformFilter,
+    // [in] the callback used to decide whether a platform is included
+    ol_device_filter_cb_t DeviceFilter,
+    // [out] output pointer for the selected devices
+    ol_device_handle_t *FilteredDevices);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieve the number of devices that would be returned by
+/// olGetFilteredDevices with the given filters.
+///
+/// @details
+///    - Platforms and devices are lazily initialized when they are first
+///    filtered
+///    - Use MaxNumDevices to stop device and platform discovery after a fixed
+///    number of devices
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == NumFilteredDevices`
+OL_APIEXPORT ol_result_t OL_APICALL olGetFilteredDevicesCount(
+    // [in] the maximum number of devices to be added to phDevices; a value of 0
+    // implies no limit.
+    uint32_t MaxNumDevices,
+    // [in] the callback used to decide whether a platform is included
+    ol_platform_filter_cb_t PlatformFilter,
+    // [in] the callback used to decide whether a platform is included
+    ol_device_filter_cb_t DeviceFilter,
+    // [out] output pointer for the number of selected devices
+    uint32_t *NumFilteredDevices);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Queries the given property of the device.
 ///
@@ -782,21 +806,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
     // [out][optional] optional recorded event for the enqueued operation
     ol_event_handle_t *EventOut);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olGetPlatform
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_get_platform_params_t {
-  uint32_t *pNumEntries;
-  ol_platform_handle_t **pPlatforms;
-} ol_get_platform_params_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olGetPlatformCount
-/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_get_platform_count_params_t {
-  uint32_t **pNumPlatforms;
-} ol_get_platform_count_params_t;
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetPlatformInfo
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -820,18 +829,36 @@ typedef struct ol_get_platform_info_size_params_t {
 /// @brief Function parameters for olGetDeviceCount
 /// @details Each entry is a pointer to the parameter passed to the function;
 typedef struct ol_get_device_count_params_t {
-  ol_platform_handle_t *pPlatform;
   uint32_t **pNumDevices;
 } ol_get_device_count_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olGetDevice
+/// @brief Function parameters for olGetDevices
 /// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_get_device_params_t {
-  ol_platform_handle_t *pPlatform;
+typedef struct ol_get_devices_params_t {
   uint32_t *pNumEntries;
   ol_device_handle_t **pDevices;
-} ol_get_device_params_t;
+} ol_get_devices_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olGetFilteredDevices
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_get_filtered_devices_params_t {
+  uint32_t *pMaxNumDevices;
+  ol_platform_filter_cb_t *pPlatformFilter;
+  ol_device_filter_cb_t *pDeviceFilter;
+  ol_device_handle_t **pFilteredDevices;
+} ol_get_filtered_devices_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olGetFilteredDevicesCount
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_get_filtered_devices_count_params_t {
+  uint32_t *pMaxNumDevices;
+  ol_platform_filter_cb_t *pPlatformFilter;
+  ol_device_filter_cb_t *pDeviceFilter;
+  uint32_t **pNumFilteredDevices;
+} ol_get_filtered_devices_count_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetDeviceInfo
@@ -976,21 +1003,6 @@ olInitWithCodeLoc(ol_code_location_t *CodeLocation);
 OL_APIEXPORT ol_result_t OL_APICALL
 olShutDownWithCodeLoc(ol_code_location_t *CodeLocation);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olGetPlatform that also sets source code location
-/// information
-/// @details See also ::olGetPlatform
-OL_APIEXPORT ol_result_t OL_APICALL
-olGetPlatformWithCodeLoc(uint32_t NumEntries, ol_platform_handle_t *Platforms,
-                         ol_code_location_t *CodeLocation);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olGetPlatformCount that also sets source code location
-/// information
-/// @details See also ::olGetPlatformCount
-OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCountWithCodeLoc(
-    uint32_t *NumPlatforms, ol_code_location_t *CodeLocation);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olGetPlatformInfo that also sets source code location
 /// information
@@ -1011,17 +1023,34 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoSizeWithCodeLoc(
 /// @brief Variant of olGetDeviceCount that also sets source code location
 /// information
 /// @details See also ::olGetDeviceCount
+OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCountWithCodeLoc(
+    uint32_t *NumDevices, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olGetDevices that also sets source code location
+/// information
+/// @details See also ::olGetDevices
 OL_APIEXPORT ol_result_t OL_APICALL
-olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform, uint32_t *NumDevices,
-                            ol_code_location_t *CodeLocation);
+olGetDevicesWithCodeLoc(uint32_t NumEntries, ol_device_handle_t *Devices,
+                        ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olGetDevice that also sets source code location
+/// @brief Variant of olGetFilteredDevices that also sets source code location
 /// information
-/// @details See also ::olGetDevice
-OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceWithCodeLoc(
-    ol_platform_handle_t Platform, uint32_t NumEntries,
-    ol_device_handle_t *Devices, ol_code_location_t *CodeLocation);
+/// @details See also ::olGetFilteredDevices
+OL_APIEXPORT ol_result_t OL_APICALL olGetFilteredDevicesWithCodeLoc(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices,
+    ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olGetFilteredDevicesCount that also sets source code
+/// location information
+/// @details See also ::olGetFilteredDevicesCount
+OL_APIEXPORT ol_result_t OL_APICALL olGetFilteredDevicesCountWithCodeLoc(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, uint32_t *NumFilteredDevices,
+    ol_code_location_t *CodeLocation);
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olGetDeviceInfo that also sets source code location
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index a02d734bc5773..0ba72e7b0a1b6 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -68,85 +68,6 @@ ol_result_t olShutDownWithCodeLoc(ol_code_location_t *CodeLocation) {
   return Result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olGetPlatform_val(uint32_t NumEntries,
-                                   ol_platform_handle_t *Platforms) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NumEntries == 0) {
-      return OL_ERRC_INVALID_SIZE;
-    }
-
-    if (NULL == Platforms) {
-      return OL_ERRC_INVALID_NULL_POINTER;
-    }
-  }
-
-  return llvm::offload::olGetPlatform_impl(NumEntries, Platforms);
-}
-OL_APIEXPORT ol_result_t OL_APICALL
-olGetPlatform(uint32_t NumEntries, ol_platform_handle_t *Platforms) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olGetPlatform";
-  }
-
-  ol_result_t Result = olGetPlatform_val(NumEntries, Platforms);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_get_platform_params_t Params = {&NumEntries, &Platforms};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olGetPlatformWithCodeLoc(uint32_t NumEntries,
-                                     ol_platform_handle_t *Platforms,
-                                     ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olGetPlatform(NumEntries, Platforms);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olGetPlatformCount_val(uint32_t *NumPlatforms) {
-  if (offloadConfig().ValidationEnabled) {
-    if (NULL == NumPlatforms) {
-      return OL_ERRC_INVALID_NULL_POINTER;
-    }
-  }
-
-  return llvm::offload::olGetPlatformCount_impl(NumPlatforms);
-}
-OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(uint32_t *NumPlatforms) {
-  if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olGetPlatformCount";
-  }
-
-  ol_result_t Result = olGetPlatformCount_val(NumPlatforms);
-
-  if (offloadConfig().TracingEnabled) {
-    ol_get_platform_count_params_t Params = {&NumPlatforms};
-    llvm::errs() << "(" << &Params << ")";
-    llvm::errs() << "-> " << Result << "\n";
-    if (Result && Result->Details) {
-      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
-    }
-  }
-  return Result;
-}
-ol_result_t olGetPlatformCountWithCodeLoc(uint32_t *NumPlatforms,
-                                          ol_code_location_t *CodeLocation) {
-  currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olGetPlatformCount(NumPlatforms);
-
-  currentCodeLocation() = nullptr;
-  return Result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 ol_impl_result_t olGetPlatformInfo_val(ol_platform_handle_t Platform,
                                        ol_platform_info_t PropName,
@@ -251,30 +172,24 @@ ol_result_t olGetPlatformInfoSizeWithCodeLoc(ol_platform_handle_t Platform,
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olGetDeviceCount_val(ol_platform_handle_t Platform,
-                                      uint32_t *NumDevices) {
+ol_impl_result_t olGetDeviceCount_val(uint32_t *NumDevices) {
   if (offloadConfig().ValidationEnabled) {
-    if (NULL == Platform) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
-    }
-
     if (NULL == NumDevices) {
       return OL_ERRC_INVALID_NULL_POINTER;
     }
   }
 
-  return llvm::offload::olGetDeviceCount_impl(Platform, NumDevices);
+  return llvm::offload::olGetDeviceCount_impl(NumDevices);
 }
-OL_APIEXPORT ol_result_t OL_APICALL
-olGetDeviceCount(ol_platform_handle_t Platform, uint32_t *NumDevices) {
+OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCount(uint32_t *NumDevices) {
   if (offloadConfig().TracingEnabled) {
     llvm::errs() << "---> olGetDeviceCount";
   }
 
-  ol_result_t Result = olGetDeviceCount_val(Platform, NumDevices);
+  ol_result_t Result = olGetDeviceCount_val(NumDevices);
 
   if (offloadConfig().TracingEnabled) {
-    ol_get_device_count_params_t Params = {&Platform, &NumDevices};
+    ol_get_device_count_params_t Params = {&NumDevices};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -283,47 +198,84 @@ olGetDeviceCount(ol_platform_handle_t Platform, uint32_t *NumDevices) {
   }
   return Result;
 }
-ol_result_t olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform,
-                                        uint32_t *NumDevices,
+ol_result_t olGetDeviceCountWithCodeLoc(uint32_t *NumDevices,
                                         ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olGetDeviceCount(Platform, NumDevices);
+  ol_result_t Result = ::olGetDeviceCount(NumDevices);
 
   currentCodeLocation() = nullptr;
   return Result;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olGetDevice_val(ol_platform_handle_t Platform,
-                                 uint32_t NumEntries,
-                                 ol_device_handle_t *Devices) {
+ol_impl_result_t olGetDevices_val(uint32_t NumEntries,
+                                  ol_device_handle_t *Devices) {
   if (offloadConfig().ValidationEnabled) {
     if (NumEntries == 0) {
       return OL_ERRC_INVALID_SIZE;
     }
 
-    if (NULL == Platform) {
-      return OL_ERRC_INVALID_NULL_HANDLE;
+    if (NULL == Devices) {
+      return OL_ERRC_INVALID_NULL_POINTER;
     }
+  }
 
-    if (NULL == Devices) {
+  return llvm::offload::olGetDevices_impl(NumEntries, Devices);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olGetDevices(uint32_t NumEntries,
+                                                 ol_device_handle_t *Devices) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olGetDevices";
+  }
+
+  ol_result_t Result = olGetDevices_val(NumEntries, Devices);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_get_devices_params_t Params = {&NumEntries, &Devices};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olGetDevicesWithCodeLoc(uint32_t NumEntries,
+                                    ol_device_handle_t *Devices,
+                                    ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olGetDevices(NumEntries, Devices);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olGetFilteredDevices_val(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == FilteredDevices) {
       return OL_ERRC_INVALID_NULL_POINTER;
     }
   }
 
-  return llvm::offload::olGetDevice_impl(Platform, NumEntries, Devices);
+  return llvm::offload::olGetFilteredDevices_impl(
+      MaxNumDevices, PlatformFilter, DeviceFilter, FilteredDevices);
 }
-OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(ol_platform_handle_t Platform,
-                                                uint32_t NumEntries,
-                                                ol_device_handle_t *Devices) {
+OL_APIEXPORT ol_result_t OL_APICALL olGetFilteredDevices(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices) {
   if (offloadConfig().TracingEnabled) {
-    llvm::errs() << "---> olGetDevice";
+    llvm::errs() << "---> olGetFilteredDevices";
   }
 
-  ol_result_t Result = olGetDevice_val(Platform, NumEntries, Devices);
+  ol_result_t Result = olGetFilteredDevices_val(MaxNumDevices, PlatformFilter,
+                                                DeviceFilter, FilteredDevices);
 
   if (offloadConfig().TracingEnabled) {
-    ol_get_device_params_t Params = {&Platform, &NumEntries, &Devices};
+    ol_get_filtered_devices_params_t Params = {&MaxNumDevices, &PlatformFilter,
+                                               &DeviceFilter, &FilteredDevices};
     llvm::errs() << "(" << &Params << ")";
     llvm::errs() << "-> " << Result << "\n";
     if (Result && Result->Details) {
@@ -332,12 +284,59 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(ol_platform_handle_t Platform,
   }
   return Result;
 }
-ol_result_t olGetDeviceWithCodeLoc(ol_platform_handle_t Platform,
-                                   uint32_t NumEntries,
-                                   ol_device_handle_t *Devices,
-                                   ol_code_location_t *CodeLocation) {
+ol_result_t olGetFilteredDevicesWithCodeLoc(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices,
+    ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = ::olGetFilteredDevices(MaxNumDevices, PlatformFilter,
+                                              DeviceFilter, FilteredDevices);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olGetFilteredDevicesCount_val(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, uint32_t *NumFilteredDevices) {
+  if (offloadConfig().ValidationEnabled) {
+    if (NULL == NumFilteredDevices) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return llvm::offload::olGetFilteredDevicesCount_impl(
+      MaxNumDevices, PlatformFilter, DeviceFilter, NumFilteredDevices);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olGetFilteredDevicesCount(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, uint32_t *NumFilteredDevices) {
+  if (offloadConfig().TracingEnabled) {
+    llvm::errs() << "---> olGetFilteredDevicesCount";
+  }
+
+  ol_result_t Result = olGetFilteredDevicesCount_val(
+      MaxNumDevices, PlatformFilter, DeviceFilter, NumFilteredDevices);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_get_filtered_devices_count_params_t Params = {
+        &MaxNumDevices, &PlatformFilter, &DeviceFilter, &NumFilteredDevices};
+    llvm::errs() << "(" << &Params << ")";
+    llvm::errs() << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      llvm::errs() << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olGetFilteredDevicesCountWithCodeLoc(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, uint32_t *NumFilteredDevices,
+    ol_code_location_t *CodeLocation) {
   currentCodeLocation() = CodeLocation;
-  ol_result_t Result = ::olGetDevice(Platform, NumEntries, Devices);
+  ol_result_t Result = ::olGetFilteredDevicesCount(
+      MaxNumDevices, PlatformFilter, DeviceFilter, NumFilteredDevices);
 
   currentCodeLocation() = nullptr;
   return Result;
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index d488d4643c093..aee0ef959a2d6 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -12,12 +12,12 @@
 
 OFFLOAD_FUNC(olInit)
 OFFLOAD_FUNC(olShutDown)
-OFFLOAD_FUNC(olGetPlatform)
-OFFLOAD_FUNC(olGetPlatformCount)
 OFFLOAD_FUNC(olGetPlatformInfo)
 OFFLOAD_FUNC(olGetPlatformInfoSize)
 OFFLOAD_FUNC(olGetDeviceCount)
-OFFLOAD_FUNC(olGetDevice)
+OFFLOAD_FUNC(olGetDevices)
+OFFLOAD_FUNC(olGetFilteredDevices)
+OFFLOAD_FUNC(olGetFilteredDevicesCount)
 OFFLOAD_FUNC(olGetDeviceInfo)
 OFFLOAD_FUNC(olGetDeviceInfoSize)
 OFFLOAD_FUNC(olGetHostDevice)
@@ -35,12 +35,12 @@ OFFLOAD_FUNC(olGetKernel)
 OFFLOAD_FUNC(olLaunchKernel)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
-OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
-OFFLOAD_FUNC(olGetPlatformCountWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformInfoWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformInfoSizeWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc)
-OFFLOAD_FUNC(olGetDeviceWithCodeLoc)
+OFFLOAD_FUNC(olGetDevicesWithCodeLoc)
+OFFLOAD_FUNC(olGetFilteredDevicesWithCodeLoc)
+OFFLOAD_FUNC(olGetFilteredDevicesCountWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
 OFFLOAD_FUNC(olGetHostDeviceWithCodeLoc)
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 4cf24b1e71fbb..6b2f92684daf7 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -9,11 +9,6 @@ ol_impl_result_t olInit_impl();
 
 ol_impl_result_t olShutDown_impl();
 
-ol_impl_result_t olGetPlatform_impl(uint32_t NumEntries,
-                                    ol_platform_handle_t *Platforms);
-
-ol_impl_result_t olGetPlatformCount_impl(uint32_t *NumPlatforms);
-
 ol_impl_result_t olGetPlatformInfo_impl(ol_platform_handle_t Platform,
                                         ol_platform_info_t PropName,
                                         size_t PropSize, void *PropValue);
@@ -22,12 +17,18 @@ ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform,
                                             ol_platform_info_t PropName,
                                             size_t *PropSizeRet);
 
-ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform,
-                                       uint32_t *NumDevices);
+ol_impl_result_t olGetDeviceCount_impl(uint32_t *NumDevices);
+
+ol_impl_result_t olGetDevices_impl(uint32_t NumEntries,
+                                   ol_device_handle_t *Devices);
+
+ol_impl_result_t olGetFilteredDevices_impl(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices);
 
-ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform,
-                                  uint32_t NumEntries,
-                                  ol_device_handle_t *Devices);
+ol_impl_result_t olGetFilteredDevicesCount_impl(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, uint32_t *NumFilteredDevices);
 
 ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device,
                                       ol_device_info_t PropName,
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index cbfa74fc70394..5599b3b03b553 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -369,32 +369,6 @@ operator<<(llvm::raw_ostream &os,
   return os;
 }
 
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_get_platform_params_t *params) {
-  os << ".NumEntries = ";
-  os << *params->pNumEntries;
-  os << ", ";
-  os << ".Platforms = ";
-  os << "{";
-  for (size_t i = 0; i < *params->pNumEntries; i++) {
-    if (i > 0) {
-      os << ", ";
-    }
-    printPtr(os, (*params->pPlatforms)[i]);
-  }
-  os << "}";
-  return os;
-}
-
-inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os,
-           const struct ol_get_platform_count_params_t *params) {
-  os << ".NumPlatforms = ";
-  printPtr(os, *params->pNumPlatforms);
-  return os;
-}
-
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
            const struct ol_get_platform_info_params_t *params) {
@@ -429,19 +403,14 @@ operator<<(llvm::raw_ostream &os,
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
            const struct ol_get_device_count_params_t *params) {
-  os << ".Platform = ";
-  printPtr(os, *params->pPlatform);
-  os << ", ";
   os << ".NumDevices = ";
   printPtr(os, *params->pNumDevices);
   return os;
 }
 
 inline llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os, const struct ol_get_device_params_t *params) {
-  os << ".Platform = ";
-  printPtr(os, *params->pPlatform);
-  os << ", ";
+operator<<(llvm::raw_ostream &os,
+           const struct ol_get_devices_params_t *params) {
   os << ".NumEntries = ";
   os << *params->pNumEntries;
   os << ", ";
@@ -457,6 +426,40 @@ operator<<(llvm::raw_ostream &os, const struct ol_get_device_params_t *params) {
   return os;
 }
 
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_get_filtered_devices_params_t *params) {
+  os << ".MaxNumDevices = ";
+  os << *params->pMaxNumDevices;
+  os << ", ";
+  os << ".PlatformFilter = ";
+  os << reinterpret_cast<void *>(*params->pPlatformFilter);
+  os << ", ";
+  os << ".DeviceFilter = ";
+  os << reinterpret_cast<void *>(*params->pDeviceFilter);
+  os << ", ";
+  os << ".FilteredDevices = ";
+  printPtr(os, *params->pFilteredDevices);
+  return os;
+}
+
+inline llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const struct ol_get_filtered_devices_count_params_t *params) {
+  os << ".MaxNumDevices = ";
+  os << *params->pMaxNumDevices;
+  os << ", ";
+  os << ".PlatformFilter = ";
+  os << reinterpret_cast<void *>(*params->pPlatformFilter);
+  os << ", ";
+  os << ".DeviceFilter = ";
+  os << reinterpret_cast<void *>(*params->pDeviceFilter);
+  os << ", ";
+  os << ".NumFilteredDevices = ";
+  printPtr(os, *params->pNumFilteredDevices);
+  return os;
+}
+
 inline llvm::raw_ostream &
 operator<<(llvm::raw_ostream &os,
            const struct ol_get_device_info_params_t *params) {
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 0f0dba499b518..33550bd4ca19e 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -51,10 +51,12 @@ struct ol_device_impl_t {
 
 struct ol_platform_impl_t {
   ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin,
-                     std::vector<ol_device_impl_t> Devices)
-      : Plugin(std::move(Plugin)), Devices(Devices) {}
+                     std::vector<ol_device_impl_t> Devices,
+                     ol_platform_backend_t BackendType)
+      : Plugin(std::move(Plugin)), Devices(Devices), BackendType(BackendType) {}
   std::unique_ptr<GenericPluginTy> Plugin;
   std::vector<ol_device_impl_t> Devices;
+  ol_platform_backend_t BackendType;
 };
 
 struct ol_queue_impl_t {
@@ -113,6 +115,16 @@ template <typename HandleT> ol_impl_result_t olDestroy(HandleT Handle) {
   return OL_SUCCESS;
 }
 
+constexpr ol_platform_backend_t pluginNameToBackend(StringRef Name) {
+  if (Name == "amd") {
+    return OL_PLATFORM_BACKEND_AMDGPU;
+  } else if (Name == "cuda") {
+    return OL_PLATFORM_BACKEND_CUDA;
+  } else {
+    return OL_PLATFORM_BACKEND_UNKNOWN;
+  }
+}
+
 // Every plugin exports this method to create an instance of the plugin type.
 #define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name();
 #include "Shared/Targets.def"
@@ -122,24 +134,12 @@ void initPlugins() {
 #define PLUGIN_TARGET(Name)                                                    \
   do {                                                                         \
     Platforms().emplace_back(ol_platform_impl_t{                               \
-        std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), {}});         \
+        std::unique_ptr<GenericPluginTy>(createPlugin_##Name()),               \
+        {},                                                                    \
+        pluginNameToBackend(#Name)});                                          \
   } while (false);
 #include "Shared/Targets.def"
 
-  // Preemptively initialize all devices in the plugin so we can just return
-  // them from deviceGet
-  for (auto &Platform : Platforms()) {
-    auto Err = Platform.Plugin->init();
-    [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
-    for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices();
-         DevNum++) {
-      if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
-        Platform.Devices.emplace_back(ol_device_impl_t{
-            DevNum, &Platform.Plugin->getDevice(DevNum), &Platform});
-      }
-    }
-  }
-
   offloadConfig().TracingEnabled = std::getenv("OFFLOAD_TRACE");
   offloadConfig().ValidationEnabled =
       !std::getenv("OFFLOAD_DISABLE_VALIDATION");
@@ -155,27 +155,6 @@ ol_impl_result_t olInit_impl() {
 }
 ol_impl_result_t olShutDown_impl() { return OL_SUCCESS; }
 
-ol_impl_result_t olGetPlatformCount_impl(uint32_t *NumPlatforms) {
-  *NumPlatforms = Platforms().size();
-  return OL_SUCCESS;
-}
-
-ol_impl_result_t olGetPlatform_impl(uint32_t NumEntries,
-                                    ol_platform_handle_t *PlatformsOut) {
-  if (NumEntries > Platforms().size()) {
-    return {OL_ERRC_INVALID_SIZE,
-            std::string{formatv("{0} platform(s) available but {1} requested.",
-                                Platforms().size(), NumEntries)}};
-  }
-
-  for (uint32_t PlatformIndex = 0; PlatformIndex < NumEntries;
-       PlatformIndex++) {
-    PlatformsOut[PlatformIndex] = &(Platforms())[PlatformIndex];
-  }
-
-  return OL_SUCCESS;
-}
-
 ol_impl_result_t olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
                                              ol_platform_info_t PropName,
                                              size_t PropSize, void *PropValue,
@@ -225,26 +204,6 @@ ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform,
                                      PropSizeRet);
 }
 
-ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform,
-                                       uint32_t *pNumDevices) {
-  *pNumDevices = static_cast<uint32_t>(Platform->Devices.size());
-
-  return OL_SUCCESS;
-}
-
-ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform,
-                                  uint32_t NumEntries,
-                                  ol_device_handle_t *Devices) {
-  if (NumEntries > Platform->Devices.size())
-    return OL_ERRC_INVALID_SIZE;
-
-  for (uint32_t DeviceIndex = 0; DeviceIndex < NumEntries; DeviceIndex++) {
-    Devices[DeviceIndex] = &(Platform->Devices[DeviceIndex]);
-  }
-
-  return OL_SUCCESS;
-}
-
 ol_impl_result_t olGetDeviceInfoImplDetail(ol_device_handle_t Device,
                                            ol_device_info_t PropName,
                                            size_t PropSize, void *PropValue,
@@ -305,6 +264,80 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
   return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
 }
 
+ol_impl_result_t olGetFilteredDevicesImplDetail(
+    uint32_t NumEntries, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices,
+    uint32_t *NumFilteredDevices) {
+  size_t DeviceIndex = 0;
+
+  for (auto &Platform : Platforms()) {
+    if (PlatformFilter(Platform.BackendType, Platform.Plugin->getName())) {
+      auto Err = Platform.Plugin->init();
+      if (Err)
+        return {OL_ERRC_UNKNOWN, "Could not initialize plugin."};
+
+      for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices();
+           DevNum++) {
+        ol_device_handle_t Device = nullptr;
+        if (!Platform.Plugin->is_device_initialized(DevNum)) {
+          if (Platform.Plugin->init_device(DevNum) != OFFLOAD_SUCCESS)
+            return {OL_ERRC_UNKNOWN, "Could not initialize device."};
+          Device = &Platform.Devices.emplace_back(ol_device_impl_t{
+              DevNum, &Platform.Plugin->getDevice(DevNum), &Platform});
+        } else {
+          Device = &Platform.Devices[DevNum];
+        }
+
+        ol_device_type_t DeviceType;
+        olGetDeviceInfoImplDetail(Device, OL_DEVICE_INFO_TYPE,
+                                  sizeof(DeviceType), &DeviceType, nullptr);
+        if (DeviceFilter(DeviceType)) {
+          if (FilteredDevices) {
+            FilteredDevices[DeviceIndex] = Device;
+          }
+          DeviceIndex++;
+          if (DeviceIndex == NumEntries) {
+            if (NumFilteredDevices)
+              *NumFilteredDevices = DeviceIndex;
+            return OL_SUCCESS;
+          }
+        }
+      }
+    }
+  }
+
+  if (NumFilteredDevices)
+    *NumFilteredDevices = DeviceIndex;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olGetFilteredDevices_impl(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, ol_device_handle_t *FilteredDevices) {
+  return olGetFilteredDevicesImplDetail(MaxNumDevices, PlatformFilter,
+                                        DeviceFilter, FilteredDevices, nullptr);
+}
+
+ol_impl_result_t olGetFilteredDevicesCount_impl(
+    uint32_t MaxNumDevices, ol_platform_filter_cb_t PlatformFilter,
+    ol_device_filter_cb_t DeviceFilter, uint32_t *NumFilteredDevices) {
+  return olGetFilteredDevicesImplDetail(
+      MaxNumDevices, PlatformFilter, DeviceFilter, nullptr, NumFilteredDevices);
+}
+
+ol_impl_result_t olGetDeviceCount_impl(uint32_t *pNumDevices) {
+  return olGetFilteredDevicesCount_impl(
+      1024, [](ol_platform_backend_t, const char *) { return true; },
+      [](ol_device_type_t) { return true; }, pNumDevices);
+}
+
+ol_impl_result_t olGetDevices_impl(uint32_t NumEntries,
+                                   ol_device_handle_t *Devices) {
+  return olGetFilteredDevices_impl(
+      NumEntries, [](ol_platform_backend_t, const char *) { return true; },
+      [](ol_device_type_t) { return true; }, Devices);
+}
+
 ol_impl_result_t olGetHostDevice_impl(ol_device_handle_t *Device) {
   *Device = HostDevice();
   return OL_SUCCESS;
diff --git a/offload/tools/offload-tblgen/APIGen.cpp b/offload/tools/offload-tblgen/APIGen.cpp
index 8cc5bd5e452fe..800c9cadfe38b 100644
--- a/offload/tools/offload-tblgen/APIGen.cpp
+++ b/offload/tools/offload-tblgen/APIGen.cpp
@@ -165,6 +165,19 @@ static void ProcessStruct(const StructRec &Struct, raw_ostream &OS) {
   OS << formatv("} {0};\n\n", Struct.getName());
 }
 
+static void ProcessFptrTypedef(const FptrTypedefRec &F, raw_ostream &OS) {
+  OS << CommentsHeader;
+  OS << formatv("/// @brief {0}\n", F.getDesc());
+  OS << formatv("typedef {0} (*{1})(", F.getReturn(), F.getName());
+  for (const auto &Param : F.getParams()) {
+    OS << formatv("\n  // {0}\n  {1} {2}", Param.getDesc(), Param.getType(),
+                  Param.getName());
+    if (Param != F.getParams().back())
+      OS << ",";
+  }
+  OS << ");\n";
+}
+
 static void ProcessFuncParamStruct(const FunctionRec &Func, raw_ostream &OS) {
   if (Func.getParams().size() == 0) {
     return;
@@ -220,6 +233,8 @@ void EmitOffloadAPI(const RecordKeeper &Records, raw_ostream &OS) {
       ProcessEnum(EnumRec{R}, OS);
     } else if (R->isSubClassOf("Struct")) {
       ProcessStruct(StructRec{R}, OS);
+    } else if (R->isSubClassOf("FptrTypedef")) {
+      ProcessFptrTypedef(FptrTypedefRec{R}, OS);
     }
   }
 
diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp
index ca1b5e3e2bead..a964ff09d0f6e 100644
--- a/offload/tools/offload-tblgen/PrintGen.cpp
+++ b/offload/tools/offload-tblgen/PrintGen.cpp
@@ -139,6 +139,9 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const struct {0} *pa
           Param.getName(), TypeInfo->first, TypeInfo->second);
     } else if (Param.isPointerType() || Param.isHandleType()) {
       OS << formatv(TAB_1 "printPtr(os, *params->p{0});\n", Param.getName());
+    } else if (Param.isFptrType()) {
+      OS << formatv(TAB_1 "os << reinterpret_cast<void*>(*params->p{0});\n",
+                    Param.getName());
     } else {
       OS << formatv(TAB_1 "os << *params->p{0};\n", Param.getName());
     }
diff --git a/offload/tools/offload-tblgen/RecordTypes.hpp b/offload/tools/offload-tblgen/RecordTypes.hpp
index 9faf361f4dd76..686634ed778aa 100644
--- a/offload/tools/offload-tblgen/RecordTypes.hpp
+++ b/offload/tools/offload-tblgen/RecordTypes.hpp
@@ -155,6 +155,7 @@ class ParamRec {
   StringRef getType() const { return rec->getValueAsString("type"); }
   bool isPointerType() const { return getType().ends_with('*'); }
   bool isHandleType() const { return getType().ends_with("_handle_t"); }
+  bool isFptrType() const { return getType().ends_with("_cb_t"); }
   StringRef getDesc() const { return rec->getValueAsString("desc"); }
   bool isIn() const { return dyn_cast<BitInit>(flags->getBit(0))->getValue(); }
   bool isOut() const { return dyn_cast<BitInit>(flags->getBit(1))->getValue(); }
@@ -224,6 +225,23 @@ class FunctionRec {
   const Record *rec;
 };
 
+class FptrTypedefRec {
+public:
+  explicit FptrTypedefRec(const Record *rec) : rec(rec) {
+    for (auto &Param : rec->getValueAsListOfDefs("params"))
+      params.emplace_back(Param);
+  }
+  StringRef getName() const { return rec->getValueAsString("name"); }
+  StringRef getDesc() const { return rec->getValueAsString("desc"); }
+  StringRef getReturn() const { return rec->getValueAsString("return"); }
+  const std::vector<ParamRec> &getParams() const { return params; }
+
+private:
+  std::vector<ParamRec> params;
+
+  const Record *rec;
+};
+
 } // namespace tblgen
 } // namespace offload
 } // namespace llvm
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 4ffd07762a4bd..4658c7caca267 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -6,8 +6,6 @@ message(${OFFLOAD_TEST_DEVICE_CODE_PATH})
 
 add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatform.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformCount.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformInfo.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformInfoSize.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDevice.cpp
diff --git a/offload/unittests/OffloadAPI/common/Environment.cpp b/offload/unittests/OffloadAPI/common/Environment.cpp
index 1eb0247e1b494..3a36e474efed3 100644
--- a/offload/unittests/OffloadAPI/common/Environment.cpp
+++ b/offload/unittests/OffloadAPI/common/Environment.cpp
@@ -27,8 +27,8 @@ static cl::opt<std::string>
     SelectedPlatform("platform", cl::desc("Only test the specified platform"),
                      cl::value_desc("platform"));
 
-std::ostream &operator<<(std::ostream &Out,
-                         const ol_platform_handle_t &Platform) {
+raw_ostream &operator<<(raw_ostream &Out,
+                        const ol_platform_handle_t &Platform) {
   size_t Size;
   olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_NAME, &Size);
   std::vector<char> Name(Size);
@@ -37,74 +37,69 @@ std::ostream &operator<<(std::ostream &Out,
   return Out;
 }
 
-std::ostream &operator<<(std::ostream &Out,
-                         const std::vector<ol_platform_handle_t> &Platforms) {
-  for (auto Platform : Platforms) {
-    Out << "\n  * \"" << Platform << "\"";
+void printPlatforms() {
+  SmallDenseSet<ol_platform_handle_t> Platforms;
+  uint32_t DeviceCount = 0;
+  olGetDeviceCount(&DeviceCount);
+  std::vector<ol_device_handle_t> Devices{DeviceCount};
+  olGetDevices(DeviceCount, Devices.data());
+  for (auto &Device : Devices) {
+    ol_platform_handle_t Platform;
+    olGetDeviceInfo(Device, OL_DEVICE_INFO_PLATFORM, sizeof(Platform),
+                    &Platform);
+    Platforms.insert(Platform);
   }
-  return Out;
-}
-
-const std::vector<ol_platform_handle_t> &TestEnvironment::getPlatforms() {
-  static std::vector<ol_platform_handle_t> Platforms{};
 
-  if (Platforms.empty()) {
-    uint32_t PlatformCount = 0;
-    olGetPlatformCount(&PlatformCount);
-    if (PlatformCount > 0) {
-      Platforms.resize(PlatformCount);
-      olGetPlatform(PlatformCount, Platforms.data());
-    }
+  for (const auto &Platform : Platforms) {
+    errs() << "  * " << Platform << "\n";
   }
-
-  return Platforms;
 }
 
-// Get a single platform, which may be selected by the user.
-ol_platform_handle_t TestEnvironment::getPlatform() {
-  static ol_platform_handle_t Platform = nullptr;
-  const auto &Platforms = getPlatforms();
-
-  if (!Platform) {
-    if (SelectedPlatform != "") {
-      for (const auto CandidatePlatform : Platforms) {
-        std::stringstream PlatformName;
-        PlatformName << CandidatePlatform;
-        if (SelectedPlatform == PlatformName.str()) {
-          Platform = CandidatePlatform;
-          return Platform;
-        }
+ol_device_handle_t TestEnvironment::getDevice() {
+  static ol_device_handle_t Device = nullptr;
+
+  if (!Device) {
+    uint32_t DeviceCount = 0;
+    auto PlatformFilter = [](ol_platform_backend_t Backend, const char *Name) {
+      if (SelectedPlatform != "") {
+        return SelectedPlatform == Name;
+      } else {
+        return Backend != OL_PLATFORM_BACKEND_UNKNOWN;
       }
-      std::cout << "No platform found with the name \"" << SelectedPlatform
-                << "\". Choose from:" << Platforms << "\n";
-      std::exit(1);
+    };
+    // Accept any device in the filtered platform
+    auto DeviceFilter = [](ol_device_type_t) { return true; };
+    olGetFilteredDevicesCount(128, PlatformFilter, DeviceFilter, &DeviceCount);
+    if (DeviceCount > 0) {
+      olGetFilteredDevices(1, PlatformFilter, DeviceFilter, &Device);
     } else {
-      // Pick a single platform. We prefer one that has available devices, but
-      // just pick the first initially in case none have any devices.
-      Platform = Platforms[0];
-      for (auto CandidatePlatform : Platforms) {
-        uint32_t NumDevices = 0;
-        if (olGetDeviceCount(CandidatePlatform, &NumDevices) == OL_SUCCESS) {
-          if (NumDevices > 0) {
-            Platform = CandidatePlatform;
-            break;
-          }
-        }
-      }
+      errs() << "No device found with the platform \"" << SelectedPlatform
+             << "\". Choose from:"
+             << "\n";
+      printPlatforms();
+      std::exit(1);
     }
   }
 
-  return Platform;
+  return Device;
+}
+
+ol_device_handle_t TestEnvironment::getHostDevice() {
+  ol_device_handle_t HostDevice = nullptr;
+  olGetHostDevice(&HostDevice);
+  return HostDevice;
 }
 
 // TODO: Allow overriding via cmd line arg
 const std::string DeviceBinsDirectory = DEVICE_CODE_PATH;
 
 bool TestEnvironment::loadDeviceBinary(
-    const std::string &BinaryName, ol_platform_handle_t Platform,
+    const std::string &BinaryName, ol_device_handle_t Device,
     std::unique_ptr<MemoryBuffer> &BinaryOut) {
 
   // Get the platform type
+  ol_platform_handle_t Platform;
+  olGetDeviceInfo(Device, OL_DEVICE_INFO_PLATFORM, sizeof(Platform), &Platform);
   ol_platform_backend_t Backend = OL_PLATFORM_BACKEND_UNKNOWN;
   olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
                     &Backend);
diff --git a/offload/unittests/OffloadAPI/common/Environment.hpp b/offload/unittests/OffloadAPI/common/Environment.hpp
index dc5d4fce7d581..a0bf688b45515 100644
--- a/offload/unittests/OffloadAPI/common/Environment.hpp
+++ b/offload/unittests/OffloadAPI/common/Environment.hpp
@@ -13,9 +13,8 @@
 #include <gtest/gtest.h>
 
 namespace TestEnvironment {
-const std::vector<ol_platform_handle_t> &getPlatforms();
-ol_platform_handle_t getPlatform();
-bool loadDeviceBinary(const std::string &BinaryName,
-                      ol_platform_handle_t Platform,
+ol_device_handle_t getDevice();
+ol_device_handle_t getHostDevice();
+bool loadDeviceBinary(const std::string &BinaryName, ol_device_handle_t Device,
                       std::unique_ptr<llvm::MemoryBuffer> &BinaryOut);
 } // namespace TestEnvironment
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index f712e8b2f6c8b..34922d0b722fc 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -46,29 +46,28 @@ struct OffloadTest : ::testing::Test {
   // No special behavior now, but just in case we need to override it in future
 };
 
-struct OffloadPlatformTest : OffloadTest {
+struct OffloadDeviceTest : OffloadTest {
   void SetUp() override {
     RETURN_ON_FATAL_FAILURE(OffloadTest::SetUp());
 
-    Platform = TestEnvironment::getPlatform();
-    ASSERT_NE(Platform, nullptr);
+    Device = TestEnvironment::getDevice();
+    if (Device == nullptr)
+      GTEST_SKIP() << "No available devices.";
   }
 
-  ol_platform_handle_t Platform;
+  ol_device_handle_t Device = nullptr;
 };
 
-struct OffloadDeviceTest : OffloadPlatformTest {
+struct OffloadPlatformTest : OffloadDeviceTest {
   void SetUp() override {
-    RETURN_ON_FATAL_FAILURE(OffloadPlatformTest::SetUp());
+    RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp());
 
-    uint32_t NumDevices;
-    ASSERT_SUCCESS(olGetDeviceCount(Platform, &NumDevices));
-    if (NumDevices == 0)
-      GTEST_SKIP() << "No available devices on this platform.";
-    ASSERT_SUCCESS(olGetDevice(Platform, 1, &Device));
+    ASSERT_SUCCESS(olGetDeviceInfo(Device, OL_DEVICE_INFO_PLATFORM,
+                                   sizeof(Platform), &Platform));
+    ASSERT_NE(Platform, nullptr);
   }
 
-  ol_device_handle_t Device = nullptr;
+  ol_platform_handle_t Platform = nullptr;
 };
 
 // Fixture for a generic program test. If you want a different program, use
@@ -76,7 +75,7 @@ struct OffloadDeviceTest : OffloadPlatformTest {
 struct OffloadProgramTest : OffloadDeviceTest {
   void SetUp() override {
     RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp());
-    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
     ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
     ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
                                    DeviceBin->getBufferSize(), &Program));
diff --git a/offload/unittests/OffloadAPI/device/olGetDevice.cpp b/offload/unittests/OffloadAPI/device/olGetDevice.cpp
index c3ec88f8036ae..b9c736d8e516e 100644
--- a/offload/unittests/OffloadAPI/device/olGetDevice.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDevice.cpp
@@ -10,16 +10,17 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olGetDeviceTest = OffloadPlatformTest;
+// TODO: Rename
+using olGetDeviceTest = OffloadTest;
 
 TEST_F(olGetDeviceTest, Success) {
   uint32_t Count = 0;
-  ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count));
+  ASSERT_SUCCESS(olGetDeviceCount(&Count));
   if (Count == 0)
-    GTEST_SKIP() << "No available devices on this platform.";
+    GTEST_SKIP() << "No available devices.";
 
   std::vector<ol_device_handle_t> Devices(Count);
-  ASSERT_SUCCESS(olGetDevice(Platform, Count, Devices.data()));
+  ASSERT_SUCCESS(olGetDevices(Count, Devices.data()));
   for (auto Device : Devices) {
     ASSERT_NE(nullptr, Device);
   }
@@ -27,12 +28,12 @@ TEST_F(olGetDeviceTest, Success) {
 
 TEST_F(olGetDeviceTest, SuccessSubsetOfDevices) {
   uint32_t Count;
-  ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count));
+  ASSERT_SUCCESS(olGetDeviceCount(&Count));
   if (Count < 2)
-    GTEST_SKIP() << "Only one device is available on this platform.";
+    GTEST_SKIP() << "Only one device is available.";
 
   std::vector<ol_device_handle_t> Devices(Count - 1);
-  ASSERT_SUCCESS(olGetDevice(Platform, Count - 1, Devices.data()));
+  ASSERT_SUCCESS(olGetDevices(Count - 1, Devices.data()));
   for (auto Device : Devices) {
     ASSERT_NE(nullptr, Device);
   }
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp
index db813cb774cd4..db526663b051f 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp
@@ -10,19 +10,13 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-using olGetDeviceCountTest = OffloadPlatformTest;
+using olGetDeviceCountTest = OffloadTest;
 
 TEST_F(olGetDeviceCountTest, Success) {
   uint32_t Count = 0;
-  ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count));
-}
-
-TEST_F(olGetDeviceCountTest, InvalidNullPlatform) {
-  uint32_t Count = 0;
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olGetDeviceCount(nullptr, &Count));
+  ASSERT_SUCCESS(olGetDeviceCount(&Count));
 }
 
 TEST_F(olGetDeviceCountTest, InvalidNullPointer) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
-               olGetDeviceCount(Platform, nullptr));
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olGetDeviceCount(nullptr));
 }
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
index a4bfc0abb2440..f71f60a2c057f 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
@@ -37,7 +37,7 @@ TEST_P(olGetDeviceInfoTest, Success) {
   if (InfoType == OL_DEVICE_INFO_PLATFORM) {
     auto *ReturnedPlatform =
         reinterpret_cast<ol_platform_handle_t *>(InfoData.data());
-    ASSERT_EQ(Platform, *ReturnedPlatform);
+    ASSERT_NE(nullptr, *ReturnedPlatform);
   }
 }
 
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 0f71fd6e7f3dc..cd0261a7549bc 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -13,7 +13,7 @@
 struct olLaunchKernelTest : OffloadQueueTest {
   void SetUp() override {
     RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
-    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
     ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
     ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
                                    DeviceBin->getBufferSize(), &Program));
diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp
deleted file mode 100644
index b663c623bf085..0000000000000
--- a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===------- Offload API tests - olGetPlatform -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olGetPlatformTest = OffloadTest;
-
-TEST_F(olGetPlatformTest, Success) {
-  uint32_t PlatformCount;
-  ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount));
-  std::vector<ol_platform_handle_t> Platforms(PlatformCount);
-  ASSERT_SUCCESS(olGetPlatform(PlatformCount, Platforms.data()));
-}
-
-TEST_F(olGetPlatformTest, InvalidNumEntries) {
-  uint32_t PlatformCount;
-  ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount));
-  std::vector<ol_platform_handle_t> Platforms(PlatformCount);
-  ASSERT_ERROR(OL_ERRC_INVALID_SIZE,
-               olGetPlatform(PlatformCount + 1, Platforms.data()));
-}
diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp
deleted file mode 100644
index 3ae00f553f97e..0000000000000
--- a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===------- Offload API tests - olGetPlatformCount ------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olGetPlatformCountTest = OffloadTest;
-
-TEST_F(olGetPlatformCountTest, Success) {
-  uint32_t PlatformCount;
-  ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount));
-}
-
-TEST_F(olGetPlatformCountTest, InvalidNullPointer) {
-  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olGetPlatformCount(nullptr));
-}
diff --git a/offload/unittests/OffloadAPI/program/olCreateProgram.cpp b/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
index bef2a16c6e10e..c586c04596201 100644
--- a/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
+++ b/offload/unittests/OffloadAPI/program/olCreateProgram.cpp
@@ -15,7 +15,7 @@ using olCreateProgramTest = OffloadDeviceTest;
 TEST_F(olCreateProgramTest, Success) {
 
   std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
-  ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Platform, DeviceBin));
+  ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
   ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
 
   ol_program_handle_t Program;



More information about the llvm-commits mailing list