[llvm-branch-commits] [llvm] [Offload] Allow "tagging" device info entries with offload keys (PR #147317)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 7 08:17:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ross Brunton (RossBrunton)
<details>
<summary>Changes</summary>
When generating the device info tree, nodes can be marked with an
offload Device Info value. The nodes can also look up children based
on this value.
---
Full diff: https://github.com/llvm/llvm-project/pull/147317.diff
3 Files Affected:
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+7-4)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+24-3)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+5-3)
``````````diff
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 832c31c43b5d2..52ea3283b24ef 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2562,7 +2562,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
if (Status == HSA_STATUS_SUCCESS && Status2 == HSA_STATUS_SUCCESS)
Info.add("HSA Runtime Version",
- std::to_string(Major) + "." + std::to_string(Minor));
+ std::to_string(Major) + "." + std::to_string(Minor), "",
+ DeviceInfo::DRIVER_VERSION);
Info.add("HSA OpenMP Device Number", DeviceId);
@@ -2572,11 +2573,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Device Name", TmpChar);
+ Info.add("Device Name", TmpChar, "", DeviceInfo::NAME);
Status = getDeviceAttrRaw(HSA_AGENT_INFO_VENDOR_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Vendor Name", TmpChar);
+ Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR);
hsa_device_type_t DevType;
Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
@@ -2652,7 +2653,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
+ auto &MaxSize =
+ *Info.add("Workgroup Max Size per Dimension", std::monostate{}, "",
+ DeviceInfo::MAX_WORK_GROUP_SIZE);
MaxSize.add("x", WorkgrpMaxDim[0]);
MaxSize.add("y", WorkgrpMaxDim[1]);
MaxSize.add("z", WorkgrpMaxDim[2]);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index b5addc13d6644..9dc01ca0277fe 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -113,6 +113,12 @@ struct AsyncInfoWrapperTy {
__tgt_async_info *AsyncInfoPtr;
};
+enum class DeviceInfo {
+#define OFFLOAD_DEVINFO(Name, _, Value) Name = Value,
+#include "OffloadInfo.inc"
+#undef OFFLOAD_DEVINFO
+};
+
/// Tree node for device information
///
/// This information is either printed or used by liboffload to extract certain
@@ -133,6 +139,8 @@ struct InfoTreeNode {
// * The same key can appear multiple times
std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
+ std::map<DeviceInfo, size_t> DeviceInfoMap;
+
InfoTreeNode() : InfoTreeNode("", std::monostate{}, "") {}
InfoTreeNode(std::string Key, VariantType Value, std::string Units)
: Key(Key), Value(Value), Units(Units) {}
@@ -140,10 +148,12 @@ struct InfoTreeNode {
/// Add a new info entry as a child of this node. The entry requires at least
/// a key string in \p Key. The value in \p Value is optional and can be any
/// type that is representable as a string. The units in \p Units is optional
- /// and must be a string.
+ /// and must be a string. Providing a device info key allows liboffload to
+ /// use that value for an appropriate olGetDeviceInfo query
template <typename T = std::monostate>
InfoTreeNode *add(std::string Key, T Value = T(),
- const std::string &Units = std::string()) {
+ const std::string &Units = std::string(),
+ std::optional<DeviceInfo> DeviceInfoKey = std::nullopt) {
assert(!Key.empty() && "Invalid info key");
if (!Children)
@@ -157,7 +167,12 @@ struct InfoTreeNode {
else
ValueVariant = std::string{Value};
- return &Children->emplace_back(Key, ValueVariant, Units);
+ auto Ptr = &Children->emplace_back(Key, ValueVariant, Units);
+
+ if (DeviceInfoKey)
+ DeviceInfoMap[*DeviceInfoKey] = Children->size() - 1;
+
+ return Ptr;
}
std::optional<InfoTreeNode *> get(StringRef Key) {
@@ -171,6 +186,12 @@ struct InfoTreeNode {
return It;
}
+ std::optional<InfoTreeNode *> get(DeviceInfo Info) {
+ if (DeviceInfoMap.count(Info))
+ return &(*Children)[DeviceInfoMap[Info]];
+ return std::nullopt;
+ }
+
/// Print all info entries in the tree
void print() const {
// Fake an additional indent so that values are offset from the keys
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index f1164074f9ea9..2365aa200f584 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -932,13 +932,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
// For consistency with other drivers, store the version as a string
// rather than an integer
- Info.add("CUDA Driver Version", std::to_string(TmpInt));
+ Info.add("CUDA Driver Version", std::to_string(TmpInt), "",
+ DeviceInfo::DRIVER_VERSION);
Info.add("CUDA OpenMP Device Number", DeviceId);
Res = cuDeviceGetName(TmpChar, 1000, Device);
if (Res == CUDA_SUCCESS)
- Info.add("Device Name", TmpChar);
+ Info.add("Device Name", TmpChar, "", DeviceInfo::NAME);
Res = cuDeviceTotalMem(&TmpSt, Device);
if (Res == CUDA_SUCCESS)
@@ -973,7 +974,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Maximum Threads per Block", TmpInt);
- auto &MaxBlock = *Info.add("Maximum Block Dimensions", "");
+ auto &MaxBlock = *Info.add("Maximum Block Dimensions", std::monostate{}, "",
+ DeviceInfo::MAX_WORK_GROUP_SIZE);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
MaxBlock.add("x", TmpInt);
``````````
</details>
https://github.com/llvm/llvm-project/pull/147317
More information about the llvm-branch-commits
mailing list