[llvm] [Offload] Add olGetKernelMaxGroupSize (PR #142950)
Ross Brunton via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 03:50:52 PDT 2025
https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/142950
>From ec7366c762c2bfbf28b02f1efbd1ae572dddf133 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 5 Jun 2025 12:21:52 +0100
Subject: [PATCH 1/3] [Offload] Add olKernelMaxGroupSize
This is equivalent to `cuOccupancyMaxPotentialBlockSize`. It is currently
only implented on Cuda; AMDGPU and Host return the legal-but-suboptimal
value of `1`.
Co-Authored-By: Callum Fare <callum at codeplay.com>
---
offload/liboffload/API/Kernel.td | 17 +++++++-
offload/liboffload/src/OffloadImpl.cpp | 17 ++++++++
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 8 ++++
.../common/include/PluginInterface.h | 3 ++
.../cuda/dynamic_cuda/cuda.cpp | 1 +
.../plugins-nextgen/cuda/dynamic_cuda/cuda.h | 3 ++
offload/plugins-nextgen/cuda/src/rtl.cpp | 14 ++++++
offload/plugins-nextgen/host/src/rtl.cpp | 7 +++
offload/unittests/OffloadAPI/CMakeLists.txt | 1 +
.../kernel/olGetKernelMaxGroupSize.cpp | 43 +++++++++++++++++++
10 files changed, 113 insertions(+), 1 deletion(-)
create mode 100644 offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 502fb36467dba..45b9c9b9f6c18 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains Offload API definitions related to launching kernels
+// This file contains Offload API definitions related to kernels
//
//===----------------------------------------------------------------------===//
@@ -42,3 +42,18 @@ def : Function {
Return<"OL_ERRC_SYMBOL_KIND", ["The provided symbol is not a kernel"]>,
];
}
+
+def : Function {
+ let name = "olGetKernelMaxGroupSize";
+ let desc = "Get the maximum block size needed to achieve maximum occupancy.";
+ let details = [];
+ let params = [
+ Param<"ol_device_handle_t", "Device", "device intended to run the kernel", PARAM_IN>,
+ Param<"ol_symbol_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+ Param<"size_t", "SharedMemory", "dynamic shared memory required", PARAM_IN>,
+ Param<"size_t*", "GroupSize", "maximum block size", PARAM_OUT>
+ ];
+ let returns = [
+ Return<"OL_ERRC_SYMBOL_KIND", ["The provided symbol is not a kernel"]>,
+ ];
+}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 1c9dfc69d445a..3d7dfb184b9de 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -696,6 +696,23 @@ Error olDestroyProgram_impl(ol_program_handle_t Program) {
return olDestroy(Program);
}
+Error olGetKernelMaxGroupSize_impl(ol_device_handle_t Device,
+ ol_symbol_handle_t Kernel,
+ size_t DynamicMemSize, size_t *GroupSize) {
+ if (Kernel->Kind != OL_SYMBOL_KIND_KERNEL)
+ return createOffloadError(ErrorCode::SYMBOL_KIND,
+ "provided symbol is not a kernel");
+ auto *KernelImpl = std::get<GenericKernelTy *>(Kernel->PluginImpl);
+
+ auto Res = KernelImpl->maxGroupSize(*Device->Device, DynamicMemSize);
+ if (auto Err = Res.takeError())
+ return Err;
+
+ *GroupSize = *Res;
+
+ return Error::success();
+}
+
Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
ol_symbol_handle_t Kernel, const void *ArgumentsData,
size_t ArgumentsSize,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 83280fe0a49c9..e7184dbe7f91c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -570,6 +570,14 @@ struct AMDGPUKernelTy : public GenericKernelTy {
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
+ /// Return maximum block size for maximum occupancy
+ ///
+ /// TODO: This needs to be implemented for amdgpu
+ Expected<size_t> maxGroupSize(GenericDeviceTy &GenericDevice,
+ size_t DynamicMemSize) const override {
+ return 1;
+ }
+
/// Print more elaborate kernel launch info for AMDGPU
Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
KernelArgsTy &KernelArgs, uint32_t NumThreads[3],
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index a448721755a6f..cea84bf94a36e 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -388,6 +388,9 @@ struct GenericKernelTy {
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0;
+ virtual Expected<size_t> maxGroupSize(GenericDeviceTy &GenericDevice,
+ size_t DynamicMemSize) const = 0;
+
/// Get the kernel name.
const char *getName() const { return Name.c_str(); }
diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
index 361a781e8f9b6..c003d0b2f9451 100644
--- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
+++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
@@ -72,6 +72,7 @@ DLWRAP(cuDevicePrimaryCtxGetState, 3)
DLWRAP(cuDevicePrimaryCtxSetFlags, 2)
DLWRAP(cuDevicePrimaryCtxRetain, 2)
DLWRAP(cuModuleLoadDataEx, 5)
+DLWRAP(cuOccupancyMaxPotentialBlockSize, 6)
DLWRAP(cuDeviceCanAccessPeer, 3)
DLWRAP(cuCtxEnablePeerAccess, 2)
diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h
index b6c022c8e7e8b..5f1c44364c143 100644
--- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h
+++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h
@@ -290,6 +290,7 @@ static inline void *CU_LAUNCH_PARAM_BUFFER_POINTER = (void *)0x01;
static inline void *CU_LAUNCH_PARAM_BUFFER_SIZE = (void *)0x02;
typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
+typedef size_t (*CUoccupancyB2DSize)(int);
CUresult cuCtxGetDevice(CUdevice *);
CUresult cuDeviceGet(CUdevice *, int);
@@ -372,5 +373,7 @@ CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size,
CUresult cuMemGetAllocationGranularity(size_t *granularity,
const CUmemAllocationProp *prop,
CUmemAllocationGranularity_flags option);
+CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
+ CUoccupancyB2DSize, size_t, int);
#endif
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index a99357a3adeaa..cf5c1008b2eea 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -157,6 +157,20 @@ struct CUDAKernelTy : public GenericKernelTy {
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
+ /// Return maximum block size for maximum occupancy
+ Expected<size_t> maxGroupSize(GenericDeviceTy &,
+ size_t DynamicMemSize) const override {
+ int minGridSize;
+ int maxBlockSize;
+ auto Res = cuOccupancyMaxPotentialBlockSize(
+ &minGridSize, &maxBlockSize, Func, NULL, DynamicMemSize, INT_MAX);
+ if (auto Err = Plugin::check(
+ Res, "error in cuOccupancyMaxPotentialBlockSize: %s")) {
+ return Err;
+ }
+ return maxBlockSize;
+ }
+
private:
/// The CUDA kernel function to execute.
CUfunction Func;
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 25443fd1ac0b3..bb1aa9831c5a1 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -114,6 +114,13 @@ struct GenELF64KernelTy : public GenericKernelTy {
return Plugin::success();
}
+ /// Return maximum block size for maximum occupancy
+ Expected<size_t> maxGroupSize(GenericDeviceTy &Device,
+ size_t DynamicMemSize) const override {
+ // TODO
+ return 1;
+ }
+
private:
/// The kernel function to execute.
void (*Func)(void);
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index b25db7022e9d7..5bed1bf7d657b 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -20,6 +20,7 @@ add_offload_unittest("init"
target_compile_definitions("init.unittests" PRIVATE DISABLE_WRAPPER)
add_offload_unittest("kernel"
+ kernel/olGetKernelMaxGroupSize.cpp
kernel/olLaunchKernel.cpp)
add_offload_unittest("memory"
diff --git a/offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp b/offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp
new file mode 100644
index 0000000000000..7923b10ea3030
--- /dev/null
+++ b/offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp
@@ -0,0 +1,43 @@
+//===------- Offload API tests - olGetKernelMaxGroupSize ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olKernelGetMaxGroupSizeTest = OffloadKernelTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olKernelGetMaxGroupSizeTest);
+
+TEST_P(olKernelGetMaxGroupSizeTest, Success) {
+ size_t Size{0};
+ ASSERT_SUCCESS(olGetKernelMaxGroupSize(Device, Kernel, 0, &Size));
+ ASSERT_GT(Size, 0u);
+}
+
+TEST_P(olKernelGetMaxGroupSizeTest, SuccessMem) {
+ size_t Size{0};
+ ASSERT_SUCCESS(olGetKernelMaxGroupSize(Device, Kernel, 1024, &Size));
+ ASSERT_GT(Size, 0u);
+}
+
+TEST_P(olKernelGetMaxGroupSizeTest, NullKernel) {
+ size_t Size;
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olGetKernelMaxGroupSize(Device, nullptr, 0, &Size));
+}
+
+TEST_P(olKernelGetMaxGroupSizeTest, NullDevice) {
+ size_t Size;
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olGetKernelMaxGroupSize(nullptr, Kernel, 0, &Size));
+}
+
+TEST_P(olKernelGetMaxGroupSizeTest, NullOutput) {
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
+ olGetKernelMaxGroupSize(Device, Kernel, 0, nullptr));
+}
>From ae21468ffc2ad03597dea7e1dec026e28523d707 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Mon, 30 Jun 2025 16:22:33 +0100
Subject: [PATCH 2/3] Use uint64_t rather than size_t
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 4 ++--
offload/plugins-nextgen/common/include/PluginInterface.h | 4 ++--
offload/plugins-nextgen/cuda/src/rtl.cpp | 4 ++--
offload/plugins-nextgen/host/src/rtl.cpp | 4 ++--
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index e7184dbe7f91c..1c5e708820ea3 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -573,8 +573,8 @@ struct AMDGPUKernelTy : public GenericKernelTy {
/// Return maximum block size for maximum occupancy
///
/// TODO: This needs to be implemented for amdgpu
- Expected<size_t> maxGroupSize(GenericDeviceTy &GenericDevice,
- size_t DynamicMemSize) const override {
+ Expected<uint64_t> maxGroupSize(GenericDeviceTy &GenericDevice,
+ uint64_t DynamicMemSize) const override {
return 1;
}
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index cea84bf94a36e..8637e4c1f12c2 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -388,8 +388,8 @@ struct GenericKernelTy {
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0;
- virtual Expected<size_t> maxGroupSize(GenericDeviceTy &GenericDevice,
- size_t DynamicMemSize) const = 0;
+ virtual Expected<uint64_t> maxGroupSize(GenericDeviceTy &GenericDevice,
+ uint64_t DynamicMemSize) const = 0;
/// Get the kernel name.
const char *getName() const { return Name.c_str(); }
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index cf5c1008b2eea..3156060504acb 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -158,8 +158,8 @@ struct CUDAKernelTy : public GenericKernelTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
/// Return maximum block size for maximum occupancy
- Expected<size_t> maxGroupSize(GenericDeviceTy &,
- size_t DynamicMemSize) const override {
+ Expected<uint64_t> maxGroupSize(GenericDeviceTy &,
+ uint64_t DynamicMemSize) const override {
int minGridSize;
int maxBlockSize;
auto Res = cuOccupancyMaxPotentialBlockSize(
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index bb1aa9831c5a1..16f31a1c4bdf5 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -115,8 +115,8 @@ struct GenELF64KernelTy : public GenericKernelTy {
}
/// Return maximum block size for maximum occupancy
- Expected<size_t> maxGroupSize(GenericDeviceTy &Device,
- size_t DynamicMemSize) const override {
+ Expected<uint64_t> maxGroupSize(GenericDeviceTy &Device,
+ uint64_t DynamicMemSize) const override {
// TODO
return 1;
}
>From be8379c8eb35991ed087c46aede7a319a93771ad Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Tue, 19 Aug 2025 11:50:29 +0100
Subject: [PATCH 3/3] Rename function and make optional
---
offload/liboffload/API/Kernel.td | 9 ++--
offload/liboffload/src/OffloadImpl.cpp | 2 +-
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 4 +-
offload/plugins-nextgen/host/src/rtl.cpp | 5 ++-
offload/unittests/OffloadAPI/CMakeLists.txt | 2 +-
.../unittests/OffloadAPI/common/Fixtures.hpp | 14 ++++++
.../kernel/olCalculateMaxOccupancy.cpp | 43 +++++++++++++++++++
.../kernel/olGetKernelMaxGroupSize.cpp | 43 -------------------
8 files changed, 70 insertions(+), 52 deletions(-)
create mode 100644 offload/unittests/OffloadAPI/kernel/olCalculateMaxOccupancy.cpp
delete mode 100644 offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 45b9c9b9f6c18..14c3cc62ff1b2 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -44,16 +44,17 @@ def : Function {
}
def : Function {
- let name = "olGetKernelMaxGroupSize";
- let desc = "Get the maximum block size needed to achieve maximum occupancy.";
+ let name = "olCalculateMaxOccupancy";
+ let desc = "Given dynamic memory size, query the device for a workgroup size that will result in optimal occupancy.";
let details = [];
let params = [
Param<"ol_device_handle_t", "Device", "device intended to run the kernel", PARAM_IN>,
Param<"ol_symbol_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
- Param<"size_t", "SharedMemory", "dynamic shared memory required", PARAM_IN>,
- Param<"size_t*", "GroupSize", "maximum block size", PARAM_OUT>
+ Param<"size_t", "SharedMemory", "dynamic shared memory required per work item in bytes", PARAM_IN>,
+ Param<"size_t*", "GroupSize", "optimal block size", PARAM_OUT>
];
let returns = [
Return<"OL_ERRC_SYMBOL_KIND", ["The provided symbol is not a kernel"]>,
+ Return<"OL_ERRC_UNSUPPORTED", ["The backend cannot provide this information"]>,
];
}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 3d7dfb184b9de..adf7b97da5a1e 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -696,7 +696,7 @@ Error olDestroyProgram_impl(ol_program_handle_t Program) {
return olDestroy(Program);
}
-Error olGetKernelMaxGroupSize_impl(ol_device_handle_t Device,
+Error olCalculateMaxOccupancy_impl(ol_device_handle_t Device,
ol_symbol_handle_t Kernel,
size_t DynamicMemSize, size_t *GroupSize) {
if (Kernel->Kind != OL_SYMBOL_KIND_KERNEL)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 1c5e708820ea3..2dd1b715f24ca 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -575,7 +575,9 @@ struct AMDGPUKernelTy : public GenericKernelTy {
/// TODO: This needs to be implemented for amdgpu
Expected<uint64_t> maxGroupSize(GenericDeviceTy &GenericDevice,
uint64_t DynamicMemSize) const override {
- return 1;
+ return Plugin::error(
+ ErrorCode::UNSUPPORTED,
+ "occupancy calculations for AMDGPU are not yet implemented");
}
/// Print more elaborate kernel launch info for AMDGPU
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 16f31a1c4bdf5..0a2bb5ae0c771 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -117,8 +117,9 @@ struct GenELF64KernelTy : public GenericKernelTy {
/// Return maximum block size for maximum occupancy
Expected<uint64_t> maxGroupSize(GenericDeviceTy &Device,
uint64_t DynamicMemSize) const override {
- // TODO
- return 1;
+ return Plugin::error(
+ ErrorCode::UNSUPPORTED,
+ "occupancy calculations are not implemented for the host device");
}
private:
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 5bed1bf7d657b..043ce224a64ef 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -20,7 +20,7 @@ add_offload_unittest("init"
target_compile_definitions("init.unittests" PRIVATE DISABLE_WRAPPER)
add_offload_unittest("kernel"
- kernel/olGetKernelMaxGroupSize.cpp
+ kernel/olCalculateMaxOccupancy.cpp
kernel/olLaunchKernel.cpp)
add_offload_unittest("memory"
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index 43240fa3c4a08..fe7198a9c283f 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -26,6 +26,20 @@
} while (0)
#endif
+#ifndef ASSERT_SUCCESS_OR_UNSUPPORTED
+#define ASSERT_SUCCESS_OR_UNSUPPORTED(ACTUAL) \
+ do { \
+ ol_result_t Res = ACTUAL; \
+ if (Res && Res->Code == OL_ERRC_UNSUPPORTED) { \
+ GTEST_SKIP() << #ACTUAL " returned unsupported; skipping test"; \
+ return; \
+ } else if (Res && Res->Code != OL_ERRC_SUCCESS) { \
+ GTEST_FAIL() << #ACTUAL " returned " << Res->Code << ": " \
+ << Res->Details; \
+ } \
+ } while (0)
+#endif
+
// TODO: rework this so the EXPECTED/ACTUAL results are readable
#ifndef ASSERT_ERROR
#define ASSERT_ERROR(EXPECTED, ACTUAL) \
diff --git a/offload/unittests/OffloadAPI/kernel/olCalculateMaxOccupancy.cpp b/offload/unittests/OffloadAPI/kernel/olCalculateMaxOccupancy.cpp
new file mode 100644
index 0000000000000..8179989821ee3
--- /dev/null
+++ b/offload/unittests/OffloadAPI/kernel/olCalculateMaxOccupancy.cpp
@@ -0,0 +1,43 @@
+//===------- Offload API tests - olCalculateMaxOccupancy ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCalculateMaxOccupancyTest = OffloadKernelTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olCalculateMaxOccupancyTest);
+
+TEST_P(olCalculateMaxOccupancyTest, Success) {
+ size_t Size{0};
+ ASSERT_SUCCESS_OR_UNSUPPORTED(olCalculateMaxOccupancy(Device, Kernel, 0, &Size));
+ ASSERT_GT(Size, 0u);
+}
+
+TEST_P(olCalculateMaxOccupancyTest, SuccessMem) {
+ size_t Size{0};
+ ASSERT_SUCCESS_OR_UNSUPPORTED(olCalculateMaxOccupancy(Device, Kernel, 1024, &Size));
+ ASSERT_GT(Size, 0u);
+}
+
+TEST_P(olCalculateMaxOccupancyTest, NullKernel) {
+ size_t Size;
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olCalculateMaxOccupancy(Device, nullptr, 0, &Size));
+}
+
+TEST_P(olCalculateMaxOccupancyTest, NullDevice) {
+ size_t Size;
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olCalculateMaxOccupancy(nullptr, Kernel, 0, &Size));
+}
+
+TEST_P(olCalculateMaxOccupancyTest, NullOutput) {
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
+ olCalculateMaxOccupancy(Device, Kernel, 0, nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp b/offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp
deleted file mode 100644
index 7923b10ea3030..0000000000000
--- a/offload/unittests/OffloadAPI/kernel/olGetKernelMaxGroupSize.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===------- Offload API tests - olGetKernelMaxGroupSize ------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../common/Fixtures.hpp"
-#include <OffloadAPI.h>
-#include <gtest/gtest.h>
-
-using olKernelGetMaxGroupSizeTest = OffloadKernelTest;
-OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olKernelGetMaxGroupSizeTest);
-
-TEST_P(olKernelGetMaxGroupSizeTest, Success) {
- size_t Size{0};
- ASSERT_SUCCESS(olGetKernelMaxGroupSize(Device, Kernel, 0, &Size));
- ASSERT_GT(Size, 0u);
-}
-
-TEST_P(olKernelGetMaxGroupSizeTest, SuccessMem) {
- size_t Size{0};
- ASSERT_SUCCESS(olGetKernelMaxGroupSize(Device, Kernel, 1024, &Size));
- ASSERT_GT(Size, 0u);
-}
-
-TEST_P(olKernelGetMaxGroupSizeTest, NullKernel) {
- size_t Size;
- ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
- olGetKernelMaxGroupSize(Device, nullptr, 0, &Size));
-}
-
-TEST_P(olKernelGetMaxGroupSizeTest, NullDevice) {
- size_t Size;
- ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
- olGetKernelMaxGroupSize(nullptr, Kernel, 0, &Size));
-}
-
-TEST_P(olKernelGetMaxGroupSizeTest, NullOutput) {
- ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
- olGetKernelMaxGroupSize(Device, Kernel, 0, nullptr));
-}
More information about the llvm-commits
mailing list