[llvm] [Offload] Port llvm-offload-device-info to new offload API (PR #155626)

Ross Brunton via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 2 05:07:46 PDT 2025


================
@@ -1,32 +1,273 @@
-//===- llvm-offload-device-info.cpp - Device info as seen by LLVM/Offload -===//
+//===- llvm-offload-device-info.cpp - Print liboffload properties ---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This is a command line utility that, by using LLVM/Offload, and the device
-// plugins, list devices information as seen by the runtime.
+// This is a command line utility that, by using the new liboffload API, prints
+// all devices and properties
 //
 //===----------------------------------------------------------------------===//
 
-#include "omptarget.h"
-#include <cstdio>
+#include <OffloadAPI.h>
+#include <iostream>
+#include <vector>
 
-int main(int argc, char **argv) {
-  __tgt_bin_desc EmptyDesc = {0, nullptr, nullptr, nullptr};
-  __tgt_register_lib(&EmptyDesc);
-  __tgt_init_all_rtls();
+#define OFFLOAD_ERR(X)                                                         \
+  if (auto Err = X) {                                                          \
+    return Err;                                                                \
+  }
+
+enum class PrintKind {
+  NORMAL,
+  FP_FLAGS,
+};
+
+template <typename T, PrintKind PK = PrintKind::NORMAL>
+void doWrite(std::ostream &S, T &&Val) {
+  S << Val;
+}
+
+template <>
+void doWrite<ol_platform_backend_t>(std::ostream &S,
+                                    ol_platform_backend_t &&Val) {
+  switch (Val) {
+  case OL_PLATFORM_BACKEND_UNKNOWN:
+    S << "UNKNOWN";
+    break;
+  case OL_PLATFORM_BACKEND_CUDA:
+    S << "CUDA";
+    break;
+  case OL_PLATFORM_BACKEND_AMDGPU:
+    S << "AMDGPU";
+    break;
+  case OL_PLATFORM_BACKEND_HOST:
+    S << "HOST";
+    break;
+  default:
+    S << "<< INVALID >>";
+    break;
+  }
+}
+template <>
+void doWrite<ol_device_type_t>(std::ostream &S, ol_device_type_t &&Val) {
+  switch (Val) {
+  case OL_DEVICE_TYPE_GPU:
+    S << "GPU";
+    break;
+  case OL_DEVICE_TYPE_CPU:
+    S << "CPU";
+    break;
+  case OL_DEVICE_TYPE_HOST:
+    S << "HOST";
+    break;
+  default:
+    S << "<< INVALID >>";
+    break;
+  }
+}
+template <>
+void doWrite<ol_dimensions_t>(std::ostream &S, ol_dimensions_t &&Val) {
+  S << "{x: " << Val.x << ", y: " << Val.y << ", z: " << Val.z << "}";
+}
+template <>
+void doWrite<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
+    std::ostream &S, ol_device_fp_capability_flags_t &&Val) {
+  S << Val << " {";
+
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) {
+    S << " CORRECTLY_ROUNDED_DIVIDE_SQRT";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) {
+    S << " ROUND_TO_NEAREST";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) {
+    S << " ROUND_TO_ZERO";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) {
+    S << " ROUND_TO_INF";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) {
+    S << " INF_NAN";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_DENORM) {
+    S << " DENORM";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_FMA) {
+    S << " FMA";
+  }
+  if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) {
+    S << " SOFT_FLOAT";
+  }
+
+  S << " }";
+}
 
-  printf("Found %d devices:\n", omp_get_num_devices());
-  for (int Dev = 0; Dev < omp_get_num_devices(); Dev++) {
-    printf("  Device %d:\n", Dev);
-    if (!__tgt_print_device_info(Dev))
----------------
RossBrunton wrote:

Yes, that's why I originally had both binaries co-exist. I'm not sure what the best approach is here - a lot of the properties are platform specific things like AMD's "SIMDs per CU" which doesn't apply for Nvidia. Having a long list of `DEVICE_INFO_AMD_SIMDS_PER_CU`-style properties would probably work, but it feels a bit messy to me.

The only remaining device info that looks to be shared between AMD and Nvidia is cache information. However, I'm not sure what AMD calls "L1 cache" is the same as what Nvidia calls "L1 cache", and devices may not provide caching information at a certain level.

https://github.com/llvm/llvm-project/pull/155626


More information about the llvm-commits mailing list