[llvm] [offload] Change olLaunchKernel to accept argument arrays (PR #173263)
Ćukasz Plewa via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 22 06:24:39 PST 2025
https://github.com/lplewa created https://github.com/llvm/llvm-project/pull/173263
olLaunchKernel previously accepted kernel arguments as a single contiguous buffer. While this was sufficient for CUDA and AMD plugins, Level Zero requires separate argument ptrs that cannot be derived from a flat buffer, without knowing the separate size of each argument.
This change updates the interface to accept an array of argument pointers and a corresponding array of argument sizes (void ** + int64_t *).
note: offload tests has to be updated to new api
>From af6a6f969f043eb58a1f5d9ed8d5c7a0ff7defe4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Plewa?= <lukasz.plewa at intel.com>
Date: Thu, 18 Dec 2025 14:28:10 +0100
Subject: [PATCH] [offload] Change olLaunchKernel to accept argument arrays
olLaunchKernel previously accepted kernel arguments as a single
contiguous buffer. While this was sufficient for CUDA and AMD plugins,
Level Zero requires separate argument ptrs that cannot be derived from a
flat buffer, without knowing the separate size of each argument.
This change updates the interface to accept an array of argument
pointers and a corresponding array of argument sizes
(void ** + int64_t *).
---
offload/liboffload/API/Kernel.td | 35 +++++++++++++------
offload/liboffload/src/OffloadImpl.cpp | 14 ++++----
.../common/src/PluginInterface.cpp | 7 ++--
.../level_zero/src/L0Kernel.cpp | 9 ++---
.../include/mathtest/DeviceContext.hpp | 5 +--
.../Conformance/lib/DeviceContext.cpp | 10 +++---
6 files changed, 49 insertions(+), 31 deletions(-)
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 2f5692a19d712..270a361541489 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -26,18 +26,31 @@ def olLaunchKernel : Function {
"If a queue is not specified, kernel execution happens synchronously",
"ArgumentsData may be set to NULL (to indicate no parameters)"
];
- let params = [
- Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
- Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
- Param<"ol_symbol_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
- Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>,
- Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
- Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
+ let params =
+ [Param<"ol_queue_handle_t", "Queue", "handle of the queue",
+ PARAM_IN_OPTIONAL>,
+ Param<"ol_device_handle_t", "Device",
+ "handle of the device to execute on", PARAM_IN>,
+ Param<"ol_symbol_handle_t", "Kernel", "handle of the kernel",
+ PARAM_IN>,
+ Param<"const void**", "ArgumentsData",
+ "pointer to the kernel arguments array", PARAM_IN_OPTIONAL>,
+ Param<"const int64_t*", "ArgumentsSize",
+ "pointer to the kernel arguments sizes array",
+ PARAM_IN_OPTIONAL>,
+ Param<"uint32_t", "ArgumentsNum",
+ "Number of the elements in the arguments arrays", PARAM_IN>,
+ Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs",
+ "pointer to the struct containing launch size parameters",
+ PARAM_IN>,
];
- let returns = [
- Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>,
- Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
- Return<"OL_ERRC_SYMBOL_KIND", ["The provided symbol is not a kernel"]>,
+ let returns =
+ [Return<"OL_ERRC_INVALID_ARGUMENT",
+ ["`ArgumentsNum > 0 && (ArgumentsData == NULL || ArgumentsSize "
+ "== NULL)`"]>,
+ Return<"OL_ERRC_INVALID_DEVICE",
+ ["If Queue is non-null but does not belong to Device"]>,
+ Return<"OL_ERRC_SYMBOL_KIND", ["The provided symbol is not a kernel"]>,
];
}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 2eb7017bbc1a8..19d6a204069d4 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1073,8 +1073,8 @@ Error olCalculateOptimalOccupancy_impl(ol_device_handle_t Device,
}
Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
- ol_symbol_handle_t Kernel, const void *ArgumentsData,
- size_t ArgumentsSize,
+ ol_symbol_handle_t Kernel, const void **ArgumentsData,
+ const int64_t *ArgumentsSizes, uint32_t ArgumentsNum,
const ol_kernel_launch_size_args_t *LaunchSizeArgs) {
auto *DeviceImpl = Device->Device;
if (Queue && Device != Queue->Device) {
@@ -1098,12 +1098,10 @@ Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
LaunchArgs.ThreadLimit[2] = LaunchSizeArgs->GroupSize.z;
LaunchArgs.DynCGroupMem = LaunchSizeArgs->DynSharedMemory;
- KernelLaunchParamsTy Params;
- Params.Data = const_cast<void *>(ArgumentsData);
- Params.Size = ArgumentsSize;
- LaunchArgs.ArgPtrs = reinterpret_cast<void **>(&Params);
- // Don't do anything with pointer indirection; use arg data as-is
- LaunchArgs.Flags.IsCUDA = true;
+ LaunchArgs.ArgPtrs = const_cast<void **>(ArgumentsData);
+ LaunchArgs.NumArgs = ArgumentsNum;
+ LaunchArgs.ArgSizes = const_cast<int64_t *>(ArgumentsSizes);
+ std::vector<void *> ArgPtrs;
auto *KernelImpl = std::get<GenericKernelTy *>(Kernel->PluginImpl);
auto Err = KernelImpl->launch(*DeviceImpl, LaunchArgs.ArgPtrs, nullptr,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index fc5fe7529e3e5..fa074cb4d5bd7 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -581,8 +581,11 @@ KernelLaunchParamsTy GenericKernelTy::prepareArgs(
}
for (uint32_t I = KLEOffset; I < NumArgs; ++I) {
- Args[I] =
- (void *)((intptr_t)ArgPtrs[I - KLEOffset] + ArgOffsets[I - KLEOffset]);
+ if (ArgOffsets == nullptr)
+ Args[I] = ArgPtrs[I - KLEOffset];
+ else
+ Args[I] = (void *)((intptr_t)ArgPtrs[I - KLEOffset] +
+ ArgOffsets[I - KLEOffset]);
Ptrs[I] = &Args[I];
}
return KernelLaunchParamsTy{sizeof(void *) * NumArgs, &Args[0], &Ptrs[0]};
diff --git a/offload/plugins-nextgen/level_zero/src/L0Kernel.cpp b/offload/plugins-nextgen/level_zero/src/L0Kernel.cpp
index e6d7bba305fd8..17657c6d35d00 100644
--- a/offload/plugins-nextgen/level_zero/src/L0Kernel.cpp
+++ b/offload/plugins-nextgen/level_zero/src/L0Kernel.cpp
@@ -449,13 +449,14 @@ Error L0KernelTy::launchImpl(GenericDeviceTy &GenericDevice,
for (int32_t I = 0; I < NumArgs; I++) {
// Scope code to ease integration with downstream custom code.
{
- void *Arg = (static_cast<void **>(LaunchParams.Data))[I];
- CALL_ZE_RET_ERROR(zeKernelSetArgumentValue, zeKernel, I, sizeof(Arg),
- Arg == nullptr ? nullptr : &Arg);
+ auto arg = KernelArgs.ArgPtrs[I];
+ CALL_ZE_RET_ERROR(zeKernelSetArgumentValue, zeKernel, I,
+ KernelArgs.ArgSizes[I], arg);
+
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
"Kernel Pointer argument %" PRId32 " (value: " DPxMOD
") was set successfully for device %s.\n",
- I, DPxPTR(Arg), IdStr);
+ I, DPxPTR(arg), IdStr);
}
}
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index 5c31fc3da53cd..2d34bce33f986 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -126,8 +126,9 @@ class DeviceContext {
llvm::StringRef KernelName) const noexcept;
void launchKernelImpl(ol_symbol_handle_t KernelHandle, uint32_t NumGroups,
- uint32_t GroupSize, const void *KernelArgs,
- std::size_t KernelArgsSize) const noexcept;
+ uint32_t GroupSize, const void **KernelArgs,
+ const int64_t *KernelArgsSizes,
+ std::size_t KernelArgsNum) const noexcept;
std::size_t GlobalDeviceId;
ol_device_handle_t DeviceHandle;
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index 6c3425f1e17c2..4a6a491f88084 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -286,9 +286,11 @@ DeviceContext::getKernelHandle(ol_program_handle_t ProgramHandle,
return Handle;
}
-void DeviceContext::launchKernelImpl(
- ol_symbol_handle_t KernelHandle, uint32_t NumGroups, uint32_t GroupSize,
- const void *KernelArgs, std::size_t KernelArgsSize) const noexcept {
+void DeviceContext::launchKernelImpl(ol_symbol_handle_t KernelHandle,
+ uint32_t NumGroups, uint32_t GroupSize,
+ const void **KernelArgs,
+ const int64_t *KernelArgsSizes,
+ std::size_t KernelArgsNum) const noexcept {
ol_kernel_launch_size_args_t LaunchSizeArgs;
LaunchSizeArgs.Dimensions = 1;
LaunchSizeArgs.NumGroups = {NumGroups, 1, 1};
@@ -296,7 +298,7 @@ void DeviceContext::launchKernelImpl(
LaunchSizeArgs.DynSharedMemory = 0;
OL_CHECK(olLaunchKernel(nullptr, DeviceHandle, KernelHandle, KernelArgs,
- KernelArgsSize, &LaunchSizeArgs));
+ KernelArgsSizes, KernelArgsNum, &LaunchSizeArgs));
}
[[nodiscard]] llvm::StringRef DeviceContext::getName() const noexcept {
More information about the llvm-commits
mailing list