[llvm] [Offload] Add `OL_DEVICE_INFO_MAX_WORK_SIZE[_PER_DIMENSION]` (PR #155823)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 05:04:23 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ross Brunton (RossBrunton)
<details>
<summary>Changes</summary>
This is the total number of work items that the device supports (the
equivalent work group properties are for only a single work group).
---
Full diff: https://github.com/llvm/llvm-project/pull/155823.diff
6 Files Affected:
- (modified) offload/liboffload/API/Device.td (+2)
- (modified) offload/liboffload/src/OffloadImpl.cpp (+9)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+3-2)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+7-1)
- (modified) offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp (+13)
- (modified) offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp (+10)
``````````diff
diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index d1baa28687fb2..f89f465e3e929 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -32,6 +32,8 @@ def ol_device_info_t : Enum {
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,
TaggedEtor<"MAX_WORK_GROUP_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work group size in each dimension">,
+ TaggedEtor<"MAX_WORK_SIZE", "uint32_t", "Maximum total work items">,
+ TaggedEtor<"MAX_WORK_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work items in each dimension">,
TaggedEtor<"VENDOR_ID", "uint32_t", "A unique vendor device identifier assigned by PCI-SIG">,
TaggedEtor<"NUM_COMPUTE_UNITS", "uint32_t", "The number of parallel compute units available to the device">,
TaggedEtor<"MAX_CLOCK_FREQUENCY", "uint32_t", "The maximum configured clock frequency of this device in MHz">,
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 9d342e06127a2..bd3e45034fa55 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -377,6 +377,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
}
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
+ case OL_DEVICE_INFO_MAX_WORK_SIZE:
case OL_DEVICE_INFO_VENDOR_ID:
case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:
case OL_DEVICE_INFO_ADDRESS_BITS:
@@ -393,6 +394,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
return Info.write(static_cast<uint32_t>(Value));
}
+ case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
// {x, y, z} triples
ol_dimensions_t Out{0, 0, 0};
@@ -431,6 +433,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
assert(Device == OffloadContext::get().HostDevice());
InfoWriter Info(PropSize, PropValue, PropSizeRet);
+ constexpr auto uint32_max = std::numeric_limits<uint32_t>::max();
+
switch (PropName) {
case OL_DEVICE_INFO_PLATFORM:
return Info.write<void *>(Device->Platform);
@@ -446,6 +450,11 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
return Info.write<uint32_t>(1);
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION:
return Info.write<ol_dimensions_t>(ol_dimensions_t{1, 1, 1});
+ case OL_DEVICE_INFO_MAX_WORK_SIZE:
+ return Info.write<uint32_t>(uint32_max);
+ case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
+ return Info.write<ol_dimensions_t>(
+ ol_dimensions_t{uint32_max, uint32_max, uint32_max});
case OL_DEVICE_INFO_VENDOR_ID:
return Info.write<uint32_t>(0);
case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 5f397436dffd1..bb62799f8da80 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2924,11 +2924,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_SIZE, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Grid Max Size", TmpUInt);
+ Info.add("Grid Max Size", TmpUInt, "", DeviceInfo::MAX_WORK_SIZE);
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- auto &MaxDim = *Info.add("Grid Max Size per Dimension");
+ auto &MaxDim = *Info.add("Grid Max Size per Dimension", std::monostate{},
+ "", DeviceInfo::MAX_WORK_SIZE_PER_DIMENSION);
MaxDim.add("x", GridMaxDim.x);
MaxDim.add("y", GridMaxDim.y);
MaxDim.add("z", GridMaxDim.z);
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index c7984287f7533..e4d2e41af53a1 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1118,7 +1118,13 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
MaxBlock.add("z", TmpInt);
- auto &MaxGrid = *Info.add("Maximum Grid Dimensions", "");
+ // TODO: I assume CUDA devices have no limit on the amount of threads,
+ // verify this
+ Info.add("Maximum Grid Size", std::numeric_limits<uint32_t>::max(), "",
+ DeviceInfo::MAX_WORK_SIZE);
+
+ auto &MaxGrid = *Info.add("Maximum Grid Dimensions", std::monostate{}, "",
+ DeviceInfo::MAX_WORK_SIZE_PER_DIMENSION);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
MaxGrid.add("x", TmpInt);
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
index 212a5d6ddf228..658f7b5434852 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
@@ -122,6 +122,19 @@ TEST_P(olGetDeviceInfoTest, SuccessMaxWorkGroupSizePerDimension) {
ASSERT_GT(Value.z, 0u);
}
+OL_DEVICE_INFO_TEST_VALUE_GT(MaxWorkSize, uint32_t,
+ OL_DEVICE_INFO_MAX_WORK_SIZE, 0);
+
+TEST_P(olGetDeviceInfoTest, SuccessMaxWorkSizePerDimension) {
+ ol_dimensions_t Value{0, 0, 0};
+ ASSERT_SUCCESS(olGetDeviceInfo(Device,
+ OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION,
+ sizeof(Value), &Value));
+ ASSERT_GT(Value.x, 0u);
+ ASSERT_GT(Value.y, 0u);
+ ASSERT_GT(Value.z, 0u);
+}
+
OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(VendorId, uint32_t,
OL_DEVICE_INFO_VENDOR_ID, 0);
OL_DEVICE_INFO_TEST_HOST_SUCCESS(VendorId, uint32_t, OL_DEVICE_INFO_VENDOR_ID);
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
index a28089d918e03..796b711f0576b 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
@@ -35,6 +35,8 @@ OL_DEVICE_INFO_SIZE_TEST_NONZERO(Vendor, OL_DEVICE_INFO_VENDOR);
OL_DEVICE_INFO_SIZE_TEST_NONZERO(DriverVersion, OL_DEVICE_INFO_DRIVER_VERSION);
OL_DEVICE_INFO_SIZE_TEST_EQ(MaxWorkGroupSize, uint32_t,
OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE);
+OL_DEVICE_INFO_SIZE_TEST_EQ(MaxWorkSize, uint32_t,
+ OL_DEVICE_INFO_MAX_WORK_SIZE);
OL_DEVICE_INFO_SIZE_TEST_EQ(VendorId, uint32_t, OL_DEVICE_INFO_VENDOR_ID);
OL_DEVICE_INFO_SIZE_TEST_EQ(NumComputeUnits, uint32_t,
OL_DEVICE_INFO_NUM_COMPUTE_UNITS);
@@ -76,6 +78,14 @@ TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
ASSERT_EQ(Size, sizeof(uint32_t) * 3);
}
+TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkSizePerDimension) {
+ size_t Size = 0;
+ ASSERT_SUCCESS(olGetDeviceInfoSize(
+ Device, OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION, &Size));
+ ASSERT_EQ(Size, sizeof(ol_dimensions_t));
+ ASSERT_EQ(Size, sizeof(uint32_t) * 3);
+}
+
TEST_P(olGetDeviceInfoSizeTest, InvalidNullHandle) {
size_t Size = 0;
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
``````````
</details>
https://github.com/llvm/llvm-project/pull/155823
More information about the llvm-commits
mailing list