[llvm] [Offload] Replace device info queue with a tree (PR #144050)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 13 03:15:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Ross Brunton (RossBrunton)
<details>
<summary>Changes</summary>
Previously, device info was returned as a queue with each element having
a "Level" field indicating its nesting level. This replaces this queue
with a more traditional tree-like structure.
This should not result in a change to the output of
`llvm-offload-device-info`.
---
Full diff: https://github.com/llvm/llvm-project/pull/144050.diff
6 Files Affected:
- (modified) offload/liboffload/src/OffloadImpl.cpp (+4-11)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+23-22)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+81-58)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+3-3)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+11-10)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+3-2)
``````````diff
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 0a784cddeaecb..770c212d804d2 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
// Find the info if it exists under any of the given names
auto GetInfo = [&](std::vector<std::string> Names) {
- InfoQueueTy DevInfo;
if (Device == HostDevice())
return std::string("Host");
if (!Device->Device)
return std::string("");
- if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
+ auto Info = Device->Device->obtainInfoImpl();
+ if (auto Err = Info.takeError())
return std::string("");
for (auto Name : Names) {
- auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
- return Info.Key == Name;
- };
- auto Item = std::find_if(DevInfo.getQueue().begin(),
- DevInfo.getQueue().end(), InfoKeyMatches);
-
- if (Item != std::end(DevInfo.getQueue())) {
- return Item->Value;
- }
+ if (auto Entry = Info->get(Name))
+ return (*Entry)->Value;
}
return std::string("");
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index e4c32713e2c15..73e1e66928fac 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Print information about the device.
- Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr = "Unknown";
uint16_t Major, Minor;
@@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
uint16_t WorkgrpMaxDim[3];
hsa_dim3_t GridMaxDim;
hsa_status_t Status, Status2;
+ InfoTreeNode Info;
Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
@@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// runtime.
Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
if (Status == HSA_STATUS_SUCCESS) {
- Info.add("Cache");
+ auto &Cache = *Info.add("Cache");
for (int I = 0; I < 4; I++)
if (CacheSize[I])
- Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
+ Cache.add("L" + std::to_string(I), CacheSize[I]);
}
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
@@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- Info.add("Workgroup Max Size per Dimension");
- Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
- Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
- Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
+ auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
+ MaxSize.add("x", WorkgrpMaxDim[0]);
+ MaxSize.add("y", WorkgrpMaxDim[1]);
+ MaxSize.add("z", WorkgrpMaxDim[2]);
}
Status = getDeviceAttrRaw(
@@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- Info.add("Grid Max Size per Dimension");
- Info.add<InfoLevel2>("x", GridMaxDim.x);
- Info.add<InfoLevel2>("y", GridMaxDim.y);
- Info.add<InfoLevel2>("z", GridMaxDim.z);
+ auto &MaxDim = *Info.add("Grid Max Size per Dimension");
+ MaxDim.add("x", GridMaxDim.x);
+ MaxDim.add("y", GridMaxDim.y);
+ MaxDim.add("z", GridMaxDim.z);
}
Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max fbarriers/Workgrp", TmpUInt);
- Info.add("Memory Pools");
+ auto &RootPool = *Info.add("Memory Pools");
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
std::string TmpStr, TmpStr2;
@@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
else
TmpStr = "Unknown";
- Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
+ auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr);
if (Pool->isGlobal()) {
if (Pool->isFineGrained())
@@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Pool->supportsKernelArgs())
TmpStr2 += "Kernarg ";
- Info.add<InfoLevel3>("Flags", TmpStr2);
+ PoolNode.add("Flags", TmpStr2);
}
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Size", TmpSt, "bytes");
+ PoolNode.add("Size", TmpSt, "bytes");
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
TmpBool);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Allocatable", TmpBool);
+ PoolNode.add("Allocatable", TmpBool);
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
+ PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes");
Status = Pool->getAttrRaw(
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
+ PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes");
Status =
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Accessible by all", TmpBool);
+ PoolNode.add("Accessible by all", TmpBool);
}
- Info.add("ISAs");
+ auto &ISAs = *Info.add("ISAs");
auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel2>("Name", TmpChar);
+ ISAs.add("Name", TmpChar);
return Status;
});
@@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Err)
consumeError(std::move(Err));
- return Plugin::success();
+ return Info;
}
/// Returns true if auto zero-copy the best configuration for the current
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index d2437908a0a6f..f5d995532b7a5 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -112,77 +112,100 @@ struct AsyncInfoWrapperTy {
__tgt_async_info *AsyncInfoPtr;
};
-/// The information level represents the level of a key-value property in the
-/// info tree print (i.e. indentation). The first level should be the default.
-enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
-
-/// Class for storing device information and later be printed. An object of this
-/// type acts as a queue of key-value properties. Each property has a key, a
-/// a value, and an optional unit for the value. For printing purposes, the
-/// information can be classified into several levels. These levels are useful
-/// for defining sections and subsections. Thus, each key-value property also
-/// has an additional field indicating to which level belongs to. Notice that
-/// we use the level to determine the indentation of the key-value property at
-/// printing time. See the enum InfoLevelKind for the list of accepted levels.
-class InfoQueueTy {
-public:
- struct InfoQueueEntryTy {
- std::string Key;
- std::string Value;
- std::string Units;
- uint64_t Level;
- };
-
-private:
- std::deque<InfoQueueEntryTy> Queue;
-
-public:
- /// Add a new info entry to the queue. The entry requires at least a key
- /// string in \p Key. The value in \p Value is optional and can be any type
- /// that is representable as a string. The units in \p Units is optional and
- /// must be a string. The info level is a template parameter that defaults to
- /// the first level (top level).
- template <InfoLevelKind L = InfoLevel1, typename T = std::string>
- void add(const std::string &Key, T Value = T(),
- const std::string &Units = std::string()) {
+/// Tree node for device information
+///
+/// This information is either printed or used by liboffload to extract certain
+/// device queries. Each property has an optional key, an optional value
+/// and optional children. The children can be used to store additional
+/// information (such as x, y and z components of ranges).
+struct InfoTreeNode {
+ static constexpr uint64_t IndentSize = 4;
+
+ std::string Key;
+ std::string Value;
+ std::string Units;
+ // Need to specify a default value number of elements here as `InfoTreeNode`'s
+ // size is unknown. This is a vector (rather than a Key->Value map) since:
+ // * The keys need to be owned and thus `std::string`s
+ // * The order of keys is important
+ // * The same key can appear multiple times
+ std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
+
+ InfoTreeNode() : InfoTreeNode("", "", "") {}
+ InfoTreeNode(std::string Key, std::string Value, std::string Units)
+ : Key(Key), Value(Value), Units(Units) {}
+
+ /// Add a new info entry as a child of this node. The entry requires at least
+ /// a key string in \p Key. The value in \p Value is optional and can be any
+ /// type that is representable as a string. The units in \p Units is optional
+ /// and must be a string.
+ template <typename T = std::string>
+ InfoTreeNode *add(std::string Key, T Value = T(),
+ const std::string &Units = std::string()) {
assert(!Key.empty() && "Invalid info key");
- // Convert the value to a string depending on its type.
+ if (!Children)
+ Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>();
+
+ std::string ValueStr;
if constexpr (std::is_same_v<T, bool>)
- Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
+ ValueStr = Value ? "Yes" : "No";
else if constexpr (std::is_arithmetic_v<T>)
- Queue.push_back({Key, std::to_string(Value), Units, L});
+ ValueStr = std::to_string(Value);
else
- Queue.push_back({Key, Value, Units, L});
+ ValueStr = Value;
+
+ return &Children->emplace_back(Key, ValueStr, Units);
}
- const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; }
+ std::optional<InfoTreeNode *> get(StringRef Key) {
+ if (!Children)
+ return std::nullopt;
- /// Print all info entries added to the queue.
- void print() const {
- // We print four spances for each level.
- constexpr uint64_t IndentSize = 4;
+ auto It = std::find_if(Children->begin(), Children->end(),
+ [&](auto &V) { return V.Key == Key; });
+ if (It == Children->end())
+ return std::nullopt;
+ return It;
+ }
- // Find the maximum key length (level + key) to compute the individual
- // indentation of each entry.
- uint64_t MaxKeySize = 0;
- for (const auto &Entry : Queue) {
- uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
- if (KeySize > MaxKeySize)
- MaxKeySize = KeySize;
- }
+ /// Print all info entries in the tree
+ void print() const {
+ // Fake an additional indent so that values are offset from the keys
+ doPrint(0, maxKeySize(1));
+ }
- // Print all info entries.
- for (const auto &Entry : Queue) {
+private:
+ void doPrint(int Level, uint64_t MaxKeySize) const {
+ if (Key.size()) {
// Compute the indentations for the current entry.
- uint64_t KeyIndentSize = Entry.Level * IndentSize;
+ uint64_t KeyIndentSize = Level * IndentSize;
uint64_t ValIndentSize =
- MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
+ MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize;
- llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
- << std::string(ValIndentSize, ' ') << Entry.Value
- << (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
+ llvm::outs() << std::string(KeyIndentSize, ' ') << Key
+ << std::string(ValIndentSize, ' ') << Value
+ << (Units.empty() ? "" : " ") << Units << "\n";
}
+
+ // Print children
+ if (Children)
+ for (const auto &Entry : *Children)
+ Entry.doPrint(Level + 1, MaxKeySize);
+ }
+
+ // Recursively calculates the maximum width of each key, including indentation
+ uint64_t maxKeySize(int Level) const {
+ uint64_t MaxKeySize = 0;
+
+ if (Children)
+ for (const auto &Entry : *Children) {
+ uint64_t KeySize = Entry.Key.size() + Level * IndentSize;
+ MaxKeySize = std::max(MaxKeySize, KeySize);
+ MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1));
+ }
+
+ return MaxKeySize;
}
};
@@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Print information about the device.
Error printInfo();
- virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
+ virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f9a6b3c1f4324..6fd3405d03afa 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
}
Error GenericDeviceTy::printInfo() {
- InfoQueueTy InfoQueue;
+ auto Info = obtainInfoImpl();
// Get the vendor-specific info entries describing the device properties.
- if (auto Err = obtainInfoImpl(InfoQueue))
+ if (auto Err = Info.takeError())
return Err;
// Print all info entries.
- InfoQueue.print();
+ Info->print();
return Plugin::success();
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 44ccfc47a21c9..9943f533ef5a8 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -922,11 +922,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Print information about the device.
- Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr;
size_t TmpSt;
int TmpInt;
+ InfoTreeNode Info;
CUresult Res = cuDriverGetVersion(&TmpInt);
if (Res == CUDA_SUCCESS)
@@ -971,27 +972,27 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Maximum Threads per Block", TmpInt);
- Info.add("Maximum Block Dimensions", "");
+ auto &MaxBlock = *Info.add("Maximum Block Dimensions", "");
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("x", TmpInt);
+ MaxBlock.add("x", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("y", TmpInt);
+ MaxBlock.add("y", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("z", TmpInt);
+ MaxBlock.add("z", TmpInt);
- Info.add("Maximum Grid Dimensions", "");
+ auto &MaxGrid = *Info.add("Maximum Grid Dimensions", "");
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("x", TmpInt);
+ MaxGrid.add("x", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("y", TmpInt);
+ MaxGrid.add("y", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("z", TmpInt);
+ MaxGrid.add("z", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_PITCH, TmpInt);
if (Res == CUDA_SUCCESS)
@@ -1087,7 +1088,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
Info.add("Compute Capabilities", ComputeCapability.str());
- return Plugin::success();
+ return Info;
}
virtual bool shouldSetupDeviceMemoryPool() const override {
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 9916f4d0ab250..ced9208acaedc 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -326,9 +326,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
- Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Expected<InfoTreeNode> obtainInfoImpl() override {
+ InfoTreeNode Info;
Info.add("Device Type", "Generic-elf-64bit");
- return Plugin::success();
+ return Info;
}
/// This plugin should not setup the device environment or memory pool.
``````````
</details>
https://github.com/llvm/llvm-project/pull/144050
More information about the llvm-commits
mailing list