[llvm] [Offload] Allow setting null arguments in olLaunchKernel (PR #141958)

Fri Jun 6 05:04:16 PDT 2025

https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/141958

>From d180dfb9ec23b7573d8526e81c2c37c2de7a3b95 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 29 May 2025 15:56:03 +0100
Subject: [PATCH 1/2] [Offload] Allow setting null arguments in olLaunchKernel

---
 offload/liboffload/API/Kernel.td              |  6 +++--
 .../OffloadAPI/device_code/CMakeLists.txt     |  1 +
 .../unittests/OffloadAPI/device_code/noargs.c |  3 +++
 .../OffloadAPI/kernel/olLaunchKernel.cpp      | 27 ++++++++++++++++---
 4 files changed, 31 insertions(+), 6 deletions(-)
 create mode 100644 offload/unittests/OffloadAPI/device_code/noargs.c

diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 247f9c1bf5b6a..45e3d8112791c 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -43,19 +43,21 @@ def : Function {
     let name = "olLaunchKernel";
     let desc = "Enqueue a kernel launch with the specified size and parameters.";
     let details = [
-        "If a queue is not specified, kernel execution happens synchronously"
+        "If a queue is not specified, kernel execution happens synchronously",
+        "ArgumentsData may be set to NULL (to indicate no parameters)"
     ];
     let params = [
         Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
         Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
         Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
-        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
+        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>,
         Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
         Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
         Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
     ];
     let returns = [
         Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
+        Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>,
         Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
     ];
 }
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 5814943e4aaa9..91a9ca24786a3 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -61,6 +61,7 @@ endif()
 
 add_offload_test_device_code(foo.c foo)
 add_offload_test_device_code(bar.c bar)
+add_offload_test_device_code(noargs.c noargs)
 
 add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
 
diff --git a/offload/unittests/OffloadAPI/device_code/noargs.c b/offload/unittests/OffloadAPI/device_code/noargs.c
new file mode 100644
index 0000000000000..36e609aa26a09
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/noargs.c
@@ -0,0 +1,3 @@
+#include <gpuintrin.h>
+
+__gpu_kernel void noargs() { (void)0; }
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index e17d094cfa612..d466799c1acaa 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -10,14 +10,14 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-struct olLaunchKernelTest : OffloadQueueTest {
-  void SetUp() override {
+struct LaunchKernelTestBase : OffloadQueueTest {
+  void SetUpKernel(const char *kernel) {
     RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
-    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin));
     ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
     ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
                                    DeviceBin->getBufferSize(), &Program));
-    ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
+    ASSERT_SUCCESS(olGetKernel(Program, kernel, &Kernel));
     LaunchArgs.Dimensions = 1;
     LaunchArgs.GroupSizeX = 64;
     LaunchArgs.GroupSizeY = 1;
@@ -43,8 +43,20 @@ struct olLaunchKernelTest : OffloadQueueTest {
   ol_kernel_launch_size_args_t LaunchArgs{};
 };
 
+struct olLaunchKernelTest : LaunchKernelTestBase {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("foo"));
+  }
+};
 OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelTest);
 
+struct olLaunchKernelNoArgsTest : LaunchKernelTestBase {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("noargs"));
+  }
+};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelNoArgsTest);
+
 TEST_P(olLaunchKernelTest, Success) {
   void *Mem;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
@@ -66,6 +78,13 @@ TEST_P(olLaunchKernelTest, Success) {
   ASSERT_SUCCESS(olMemFree(Mem));
 }
 
+TEST_P(olLaunchKernelNoArgsTest, Success) {
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr));
+
+  ASSERT_SUCCESS(olWaitQueue(Queue));
+}
+
 TEST_P(olLaunchKernelTest, SuccessSynchronous) {
   void *Mem;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,

>From 70ba0e6f49c2503f22eac0979a2bc83df36f01f7 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Fri, 30 May 2025 10:14:16 +0100
Subject: [PATCH 2/2] Set opts for noargs.c

---
 offload/unittests/OffloadAPI/device_code/CMakeLists.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 91a9ca24786a3..c2e4d0cb24e6e 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -7,6 +7,7 @@ macro(add_offload_test_device_code test_filename test_name)
         add_custom_command(OUTPUT ${BIN_PATH}
             COMMAND
             ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
+            ${ARGN}
             -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
             --cuda-path=${CUDA_ROOT}
             ${SRC_PATH} -o ${BIN_PATH}
@@ -21,6 +22,7 @@ macro(add_offload_test_device_code test_filename test_name)
         add_custom_command(OUTPUT ${BIN_PATH}
             COMMAND
             ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
+            ${ARGN}
             -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
             ${SRC_PATH} -o ${BIN_PATH}
             DEPENDS ${SRC_PATH}
@@ -61,7 +63,9 @@ endif()
 
 add_offload_test_device_code(foo.c foo)
 add_offload_test_device_code(bar.c bar)
-add_offload_test_device_code(noargs.c noargs)
+# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
+# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
+add_offload_test_device_code(noargs.c noargs -O3)
 
 add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})