[llvm] [Offload] Rework `MAX_WORK_GROUP_SIZE` (PR #151926)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 02:14:31 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-offload
Author: Ross Brunton (RossBrunton)
<details>
<summary>Changes</summary>
`MAX_WORK_GROUP_SIZE` now represents the maximum total number of work
groups the device can allocate, rather than the maximum per dimension.
`MAX_WORK_GROUP_SIZE_PER_DIMENSION` has been added, which has the old
behaviour.
---
Full diff: https://github.com/llvm/llvm-project/pull/151926.diff
6 Files Affected:
- (modified) offload/liboffload/API/Device.td (+2-1)
- (modified) offload/liboffload/src/OffloadImpl.cpp (+14)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+3-2)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+3-2)
- (modified) offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp (+9-1)
- (modified) offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp (+7)
``````````diff
diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index 37f4e815d90c3..857c596124b27 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -32,7 +32,8 @@ def DeviceInfo : Enum {
TaggedEtor<"NAME", "char[]", "Device name">,
TaggedEtor<"VENDOR", "char[]", "Device vendor">,
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
- TaggedEtor<"MAX_WORK_GROUP_SIZE", "ol_dimensions_t", "Maximum work group size in each dimension">,
+ TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,
+ TaggedEtor<"MAX_WORK_GROUP_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work group size in each dimension">,
];
}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 2444ccdb871ac..6486b2b6d13a6 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -326,6 +326,18 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
}
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
+ // Uint32 values
+ if (!std::holds_alternative<uint64_t>(Entry->Value))
+ return makeError(ErrorCode::BACKEND_FAILURE,
+ "plugin returned incorrect type");
+ auto Value = std::get<uint64_t>(Entry->Value);
+ if (Value > std::numeric_limits<uint32_t>::max())
+ return makeError(ErrorCode::BACKEND_FAILURE,
+ "plugin returned out of range device info");
+ return Info.write(static_cast<uint32_t>(Value));
+ }
+
+ case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
// {x, y, z} triples
ol_dimensions_t Out{0, 0, 0};
@@ -375,6 +387,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
case OL_DEVICE_INFO_DRIVER_VERSION:
return Info.writeString(LLVM_VERSION_STRING);
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
+ return Info.write<uint64_t>(1);
+ case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION:
return Info.write<ol_dimensions_t>(ol_dimensions_t{1, 1, 1});
default:
return createOffloadError(ErrorCode::INVALID_ENUMERATION,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index f8db9bf0ae739..b7bfa89fc9ea6 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2702,13 +2702,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Workgroup Max Size", TmpUInt);
+ Info.add("Workgroup Max Size", TmpUInt, "",
+ DeviceInfo::MAX_WORK_GROUP_SIZE);
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
auto &MaxSize =
*Info.add("Workgroup Max Size per Dimension", std::monostate{}, "",
- DeviceInfo::MAX_WORK_GROUP_SIZE);
+ DeviceInfo::MAX_WORK_GROUP_SIZE_PER_DIMENSION);
MaxSize.add("x", WorkgrpMaxDim[0]);
MaxSize.add("y", WorkgrpMaxDim[1]);
MaxSize.add("z", WorkgrpMaxDim[2]);
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 5a391a4d36006..c5f31670079ae 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -977,10 +977,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add("Maximum Threads per Block", TmpInt);
+ Info.add("Maximum Threads per Block", TmpInt, "",
+ DeviceInfo::MAX_WORK_GROUP_SIZE);
auto &MaxBlock = *Info.add("Maximum Block Dimensions", std::monostate{}, "",
- DeviceInfo::MAX_WORK_GROUP_SIZE);
+ DeviceInfo::MAX_WORK_GROUP_SIZE_PER_DIMENSION);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
MaxBlock.add("x", TmpInt);
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
index c534c45205993..5657320a33a29 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
@@ -78,9 +78,17 @@ TEST_P(olGetDeviceInfoTest, SuccessDriverVersion) {
}
TEST_P(olGetDeviceInfoTest, SuccessMaxWorkGroupSize) {
- ol_dimensions_t Value{0, 0, 0};
+ uint32_t Value;
ASSERT_SUCCESS(olGetDeviceInfo(Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
sizeof(Value), &Value));
+ ASSERT_GT(Value, 0u);
+}
+
+TEST_P(olGetDeviceInfoTest, SuccessMaxWorkGroupSizePerDimension) {
+ ol_dimensions_t Value{0, 0, 0};
+ ASSERT_SUCCESS(
+ olGetDeviceInfo(Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION,
+ sizeof(Value), &Value));
ASSERT_GT(Value.x, 0u);
ASSERT_GT(Value.y, 0u);
ASSERT_GT(Value.z, 0u);
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
index a908078a25211..4e29978fc20f0 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
@@ -48,6 +48,13 @@ TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSize) {
size_t Size = 0;
ASSERT_SUCCESS(
olGetDeviceInfoSize(Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, &Size));
+ ASSERT_EQ(Size, sizeof(uint32_t));
+}
+
+TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
+ size_t Size = 0;
+ ASSERT_SUCCESS(olGetDeviceInfoSize(
+ Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION, &Size));
ASSERT_EQ(Size, sizeof(ol_dimensions_t));
ASSERT_EQ(Size, sizeof(uint32_t) * 3);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/151926
More information about the llvm-commits
mailing list