[libc] [llvm] [LLVM] Port 'llvm-gpu-loader' to use LLVMOffload (PR #162739)

Fri Oct 10 09:10:39 PDT 2025

================
@@ -1,108 +1,177 @@
-//===-- Generic device loader interface -----------------------------------===//
+//===-- Dynamically loaded offload API ------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+// Dynamically loads the API provided by the LLVMOffload library. We need to do
+// this dynamically because this tool is used before it is actually built and
+// should be provided even when the user did not specify the offload runtime.
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
 #define LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
 
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-/// Generic launch parameters for configuration the number of blocks / threads.
-struct LaunchParameters {
-  uint32_t num_threads_x;
-  uint32_t num_threads_y;
-  uint32_t num_threads_z;
-  uint32_t num_blocks_x;
-  uint32_t num_blocks_y;
-  uint32_t num_blocks_z;
-};
-
-/// The arguments to the '_begin' kernel.
-struct begin_args_t {
-  int argc;
-  void *argv;
-  void *envp;
-};
-
-/// The arguments to the '_start' kernel.
-struct start_args_t {
-  int argc;
-  void *argv;
-  void *envp;
-  void *ret;
-};
-
-/// The arguments to the '_end' kernel.
-struct end_args_t {};
-
-/// Generic interface to load the \p image and launch execution of the _start
-/// kernel on the target device. Copies \p argc and \p argv to the device.
-/// Returns the final value of the `main` function on the device.
-#ifdef AMDHSA_SUPPORT
-int load_amdhsa(int argc, const char **argv, const char **evnp, void *image,
-                size_t size, const LaunchParameters &params,
-                bool print_resource_usage);
-#endif
-#ifdef NVPTX_SUPPORT
-int load_nvptx(int argc, const char **argv, const char **evnp, void *image,
-               size_t size, const LaunchParameters &params,
-               bool print_resource_usage);
-#endif
-
-/// Return \p V aligned "upwards" according to \p Align.
-template <typename V, typename A> inline V align_up(V val, A align) {
-  return ((val + V(align) - 1) / V(align)) * V(align);
-}
-
-/// Copy the system's argument vector to GPU memory allocated using \p alloc.
-template <typename Allocator>
-void *copy_argument_vector(int argc, const char **argv, Allocator alloc) {
-  size_t argv_size = sizeof(char *) * (argc + 1);
-  size_t str_size = 0;
-  for (int i = 0; i < argc; ++i)
-    str_size += strlen(argv[i]) + 1;
-
-  // We allocate enough space for a null terminated array and all the strings.
-  void *dev_argv = alloc(argv_size + str_size);
-  if (!dev_argv)
-    return nullptr;
-
-  // Store the strings linerally in the same memory buffer.
-  void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
-  for (int i = 0; i < argc; ++i) {
-    size_t size = strlen(argv[i]) + 1;
-    std::memcpy(dev_str, argv[i], size);
-    static_cast<void **>(dev_argv)[i] = dev_str;
-    dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
-  }
-
-  // Ensure the vector is null terminated.
-  reinterpret_cast<void **>(dev_argv)[argc] = nullptr;
-  return dev_argv;
-}
-
-/// Copy the system's environment to GPU memory allocated using \p alloc.
-template <typename Allocator>
-void *copy_environment(const char **envp, Allocator alloc) {
-  int envc = 0;
-  for (const char **env = envp; *env != 0; ++env)
-    ++envc;
-
-  return copy_argument_vector(envc, envp, alloc);
-}
-
-inline void handle_error_impl(const char *file, int32_t line, const char *msg) {
-  fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg);
-  exit(EXIT_FAILURE);
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+typedef enum ol_alloc_type_t {
+  OL_ALLOC_TYPE_HOST = 0,
+  OL_ALLOC_TYPE_DEVICE = 1,
+  OL_ALLOC_TYPE_FORCE_UINT32 = 0x7fffffff
+} ol_alloc_type_t;
+
+typedef enum ol_device_info_t {
+  OL_DEVICE_INFO_TYPE = 0,
+  OL_DEVICE_INFO_PLATFORM = 1,
+  OL_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
+} ol_device_info_t;
+
+typedef enum ol_platform_info_t {
+  OL_PLATFORM_INFO_NAME = 0,
+  OL_PLATFORM_INFO_BACKEND = 3,
+  OL_PLATFORM_INFO_FORCE_UINT32 = 0x7fffffff
+} ol_platform_info_t;
+
+typedef enum ol_symbol_kind_t {
+  OL_SYMBOL_KIND_KERNEL = 0,
+  OL_SYMBOL_KIND_GLOBAL_VARIABLE = 1,
+  OL_SYMBOL_KIND_FORCE_UINT32 = 0x7fffffff
+} ol_symbol_kind_t;
+
+typedef enum ol_errc_t {
+  OL_ERRC_SUCCESS = 0,
+  OL_ERRC_FORCE_UINT32 = 0x7fffffff
+} ol_errc_t;
+
+typedef struct ol_error_struct_t {
+  ol_errc_t Code;
+  const char *Details;
+} ol_error_struct_t;
+
+typedef struct ol_dimensions_t {
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+} ol_dimensions_t;
+
+typedef struct ol_kernel_launch_size_args_t {
+  size_t Dimensions;
+  struct ol_dimensions_t NumGroups;
+  struct ol_dimensions_t GroupSize;
+  size_t DynSharedMemory;
+} ol_kernel_launch_size_args_t;
+
+typedef enum ol_platform_backend_t {
+  OL_PLATFORM_BACKEND_UNKNOWN = 0,
+  OL_PLATFORM_BACKEND_CUDA = 1,
+  OL_PLATFORM_BACKEND_AMDGPU = 2,
----------------
sarnex wrote:

should we add one for `LEVEL_ZERO` even though the plugin isn't merged yet?

https://github.com/llvm/llvm-project/pull/162739