[libc] [llvm] Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (#132096)" (PR #132277)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 13:04:47 PDT 2025
https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/132277
Summary:
There were a few issues with the first one, leading to some errors and
warnings. Most importantly, this was building on MSVC which isn't
supported.
>From 8f5175109c460d3dad0d821e5a80547f136fcee6 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 20 Mar 2025 14:43:49 -0500
Subject: [PATCH] Reapply "[LLVM] Make the GPU loader utilities an LLVM tool
(#132096)"
Summary:
There were a few issues with the first one, leading to some errors and
warnings. Most importantly, this was building on MSVC which isn't
supported.
---
libc/CMakeLists.txt | 7 --
libc/src/__support/RPC/rpc_server.h | 7 +-
libc/utils/gpu/CMakeLists.txt | 1 -
libc/utils/gpu/loader/CMakeLists.txt | 54 ---------
libc/utils/gpu/loader/amdgpu/CMakeLists.txt | 10 --
libc/utils/gpu/loader/nvptx/CMakeLists.txt | 9 --
llvm/CMakeLists.txt | 4 -
llvm/runtimes/CMakeLists.txt | 14 ---
llvm/tools/CMakeLists.txt | 4 +
llvm/tools/llvm-gpu-loader/CMakeLists.txt | 46 ++++++++
.../tools/llvm-gpu-loader/amdhsa.cpp | 14 +--
.../tools/llvm-gpu-loader/llvm-gpu-loader.cpp | 73 +++++++-----
llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h | 110 ++++++++++++++++++
.../tools/llvm-gpu-loader/nvptx.cpp | 9 +-
.../tools/llvm-gpu-loader/server.h | 102 ++--------------
15 files changed, 228 insertions(+), 236 deletions(-)
delete mode 100644 libc/utils/gpu/CMakeLists.txt
delete mode 100644 libc/utils/gpu/loader/CMakeLists.txt
delete mode 100644 libc/utils/gpu/loader/amdgpu/CMakeLists.txt
delete mode 100644 libc/utils/gpu/loader/nvptx/CMakeLists.txt
create mode 100644 llvm/tools/llvm-gpu-loader/CMakeLists.txt
rename libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp => llvm/tools/llvm-gpu-loader/amdhsa.cpp (98%)
rename libc/utils/gpu/loader/Main.cpp => llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp (69%)
create mode 100644 llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h
rename libc/utils/gpu/loader/nvptx/nvptx-loader.cpp => llvm/tools/llvm-gpu-loader/nvptx.cpp (98%)
rename libc/utils/gpu/loader/Loader.h => llvm/tools/llvm-gpu-loader/server.h (52%)
diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index ad39ff6fbcb1e..b264dcb4974c7 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -59,13 +59,6 @@ set(LIBC_NAMESPACE ${default_namespace}
CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'."
)
-# We will build the GPU utilities if we are not doing a runtimes build.
-option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
-if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD))
- add_subdirectory(utils/gpu)
- return()
-endif()
-
option(LIBC_CMAKE_VERBOSE_LOGGING
"Log details warnings and notifications during CMake configuration." OFF)
diff --git a/libc/src/__support/RPC/rpc_server.h b/libc/src/__support/RPC/rpc_server.h
index 7387eba9ceb26..dc3d8030caa47 100644
--- a/libc/src/__support/RPC/rpc_server.h
+++ b/libc/src/__support/RPC/rpc_server.h
@@ -20,6 +20,11 @@
#define __has_builtin(x) 0
#endif
+// Workaround for missing __builtin_is_constant_evaluated in < GCC 10.
+#ifndef __builtin_is_constant_evaluated
+#define __builtin_is_constant_evaluated(x) 0
+#endif
+
// Configs for using the LLVM libc writer interface.
#define LIBC_COPT_USE_C_ASSERT
#define LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY
@@ -28,7 +33,7 @@
#define LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
#define LIBC_COPT_PRINTF_DISABLE_STRERROR
-// The 'long double' type is 8 byte
+// The 'long double' type is 8 bytes.
#define LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64
#include "shared/rpc.h"
diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt
deleted file mode 100644
index e529646a1206e..0000000000000
--- a/libc/utils/gpu/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_subdirectory(loader)
diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt
deleted file mode 100644
index 9b3bd009dc0f1..0000000000000
--- a/libc/utils/gpu/loader/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-add_library(gpu_loader OBJECT Main.cpp)
-
-include(FindLibcCommonUtils)
-target_link_libraries(gpu_loader PUBLIC llvm-libc-common-utilities)
-
-target_include_directories(gpu_loader PUBLIC
- ${CMAKE_CURRENT_SOURCE_DIR}
- ${LIBC_SOURCE_DIR}/include
- ${LIBC_SOURCE_DIR}
- ${LLVM_MAIN_INCLUDE_DIR}
- ${LLVM_BINARY_DIR}/include
-)
-if(NOT LLVM_ENABLE_RTTI)
- target_compile_options(gpu_loader PUBLIC -fno-rtti)
-endif()
-
-find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
-if(hsa-runtime64_FOUND)
- add_subdirectory(amdgpu)
-endif()
-
-# The CUDA loader requires LLVM to traverse the ELF image for symbols.
-find_package(CUDAToolkit 11.2 QUIET)
-if(CUDAToolkit_FOUND)
- add_subdirectory(nvptx)
-endif()
-
-if(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
- add_custom_target(libc.utils.gpu.loader)
- add_dependencies(libc.utils.gpu.loader amdhsa-loader)
- set_target_properties(
- libc.utils.gpu.loader
- PROPERTIES
- TARGET amdhsa-loader
- EXECUTABLE "$<TARGET_FILE:amdhsa-loader>"
- )
-elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
- add_custom_target(libc.utils.gpu.loader)
- add_dependencies(libc.utils.gpu.loader nvptx-loader)
- set_target_properties(
- libc.utils.gpu.loader
- PROPERTIES
- TARGET nvptx-loader
- EXECUTABLE "$<TARGET_FILE:nvptx-loader>"
- )
-endif()
-
-foreach(gpu_loader_tgt amdhsa-loader nvptx-loader)
- if(TARGET ${gpu_loader_tgt})
- install(TARGETS ${gpu_loader_tgt}
- DESTINATION ${CMAKE_INSTALL_BINDIR}
- COMPONENT libc)
- endif()
-endforeach()
diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
deleted file mode 100644
index 17878daf0b6fe..0000000000000
--- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-set(LLVM_LINK_COMPONENTS
- BinaryFormat
- Object
- Option
- Support
- FrontendOffloading
- )
-
-add_llvm_executable(amdhsa-loader amdhsa-loader.cpp)
-target_link_libraries(amdhsa-loader PRIVATE hsa-runtime64::hsa-runtime64 gpu_loader)
diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
deleted file mode 100644
index 42510ac31dad4..0000000000000
--- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-set(LLVM_LINK_COMPONENTS
- BinaryFormat
- Object
- Option
- Support
- )
-
-add_llvm_executable(nvptx-loader nvptx-loader.cpp)
-target_link_libraries(nvptx-loader PRIVATE gpu_loader CUDA::cuda_driver)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 5639061bea206..e76bc9b9ab778 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -210,10 +210,6 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR
"${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda")
set(LLVM_LIBC_GPU_BUILD ON)
endif()
-if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD)
- message(STATUS "Enabling libc project to build libc testing tools")
- list(APPEND LLVM_ENABLE_PROJECTS "libc")
-endif()
# LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the
# `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 136099dc48ab8..51433d1ec9831 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -534,20 +534,6 @@ if(build_runtimes)
endif()
if(LLVM_LIBC_GPU_BUILD)
list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
- if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
- if(TARGET amdhsa-loader)
- list(APPEND extra_cmake_args
- "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>")
- list(APPEND extra_deps amdhsa-loader)
- endif()
- endif()
- if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
- if(TARGET nvptx-loader)
- list(APPEND extra_cmake_args
- "-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>")
- list(APPEND extra_deps nvptx-loader)
- endif()
- endif()
if(TARGET clang-offload-packager)
list(APPEND extra_deps clang-offload-packager)
endif()
diff --git a/llvm/tools/CMakeLists.txt b/llvm/tools/CMakeLists.txt
index b9c5a79849ec8..9fe6f8c6b9c21 100644
--- a/llvm/tools/CMakeLists.txt
+++ b/llvm/tools/CMakeLists.txt
@@ -9,6 +9,10 @@
# traversing each directory.
create_llvm_tool_options()
+if(NOT LLVM_COMPILER_IS_GCC_COMPATIBLE)
+ set(LLVM_TOOL_LLVM_GPU_LOADER_BUILD OFF)
+endif()
+
if(NOT LLVM_BUILD_LLVM_DYLIB AND NOT LLVM_BUILD_LLVM_C_DYLIB)
set(LLVM_TOOL_LLVM_SHLIB_BUILD Off)
endif()
diff --git a/llvm/tools/llvm-gpu-loader/CMakeLists.txt b/llvm/tools/llvm-gpu-loader/CMakeLists.txt
new file mode 100644
index 0000000000000..b35a702476ada
--- /dev/null
+++ b/llvm/tools/llvm-gpu-loader/CMakeLists.txt
@@ -0,0 +1,46 @@
+set(LLVM_LINK_COMPONENTS
+ BinaryFormat
+ Object
+ Option
+ Support
+ FrontendOffloading
+ TargetParser
+)
+
+add_llvm_tool(llvm-gpu-loader
+ llvm-gpu-loader.cpp
+
+ # TODO: We intentionally split this currently due to statically linking the
+ # GPU runtimes. Dynamically load the dependencies, possibly using the
+ # LLVM offloading API when it is complete.
+ PARTIAL_SOURCES_INTENDED
+
+ DEPENDS
+ intrinsics_gen
+)
+
+# Locate the RPC server handling interface.
+include(FindLibcCommonUtils)
+target_link_libraries(llvm-gpu-loader PUBLIC llvm-libc-common-utilities)
+
+# Check for HSA support for targeting AMD GPUs.
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if(hsa-runtime64_FOUND)
+ target_sources(llvm-gpu-loader PRIVATE amdhsa.cpp)
+ target_compile_definitions(llvm-gpu-loader PRIVATE AMDHSA_SUPPORT)
+ target_link_libraries(llvm-gpu-loader PRIVATE hsa-runtime64::hsa-runtime64)
+
+ # Compatibility with the old amdhsa-loader name.
+ add_llvm_tool_symlink(amdhsa-loader llvm-gpu-loader)
+endif()
+
+# Check for CUDA support for targeting NVIDIA GPUs.
+find_package(CUDAToolkit 11.2 QUIET)
+if(CUDAToolkit_FOUND)
+ target_sources(llvm-gpu-loader PRIVATE nvptx.cpp)
+ target_compile_definitions(llvm-gpu-loader PRIVATE NVPTX_SUPPORT)
+ target_link_libraries(llvm-gpu-loader PRIVATE CUDA::cuda_driver)
+
+ # Compatibility with the old nvptx-loader name.
+ add_llvm_tool_symlink(nvptx-loader llvm-gpu-loader)
+endif()
diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/llvm/tools/llvm-gpu-loader/amdhsa.cpp
similarity index 98%
rename from libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
rename to llvm/tools/llvm-gpu-loader/amdhsa.cpp
index 00fde147b0abd..be1b6b7993920 100644
--- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
+++ b/llvm/tools/llvm-gpu-loader/amdhsa.cpp
@@ -13,7 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Loader.h"
+#include "llvm-gpu-loader.h"
+#include "server.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
@@ -260,9 +261,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
// Register RPC callbacks for the malloc and free functions on HSA.
auto malloc_handler = [&](size_t size) -> void * {
void *dev_ptr = nullptr;
- if (hsa_status_t err =
- hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
- /*flags=*/0, &dev_ptr))
+ if (hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
+ /*flags=*/0, &dev_ptr))
dev_ptr = nullptr;
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);
return dev_ptr;
@@ -330,9 +330,9 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent,
return HSA_STATUS_SUCCESS;
}
-int load(int argc, const char **argv, const char **envp, void *image,
- size_t size, const LaunchParameters ¶ms,
- bool print_resource_usage) {
+int load_amdhsa(int argc, const char **argv, const char **envp, void *image,
+ size_t size, const LaunchParameters ¶ms,
+ bool print_resource_usage) {
// Initialize the HSA runtime used to communicate with the device.
if (hsa_status_t err = hsa_init())
handle_error(err);
diff --git a/libc/utils/gpu/loader/Main.cpp b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp
similarity index 69%
rename from libc/utils/gpu/loader/Main.cpp
rename to llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp
index c3aeeffd56368..a8204664e85eb 100644
--- a/libc/utils/gpu/loader/Main.cpp
+++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp
@@ -6,14 +6,17 @@
//
//===----------------------------------------------------------------------===//
//
-// This file opens a device image passed on the command line and passes it to
-// one of the loader implementations for launch.
+// This utility is used to launch standard programs onto the GPU in conjunction
+// with the LLVM 'libc' project. It is designed to mimic a standard emulator
+// workflow, allowing for unit tests to be run on the GPU directly.
//
//===----------------------------------------------------------------------===//
-#include "Loader.h"
+#include "llvm-gpu-loader.h"
#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -21,6 +24,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Triple.h"
#include <cerrno>
#include <cstdio>
@@ -67,12 +71,6 @@ static cl::opt<bool>
cl::desc("Output resource usage of launched kernels"),
cl::init(false), cl::cat(loader_category));
-static cl::opt<bool>
- no_parallelism("no-parallelism",
- cl::desc("Allows only a single process to use the GPU at a "
- "time. Useful to suppress out-of-resource errors"),
- cl::init(false), cl::cat(loader_category));
-
static cl::opt<std::string> file(cl::Positional, cl::Required,
cl::desc("<gpu executable>"),
cl::cat(loader_category));
@@ -115,27 +113,42 @@ int main(int argc, const char **argv, const char **envp) {
llvm::transform(args, std::back_inserter(new_argv),
[](const std::string &arg) { return arg.c_str(); });
- // Claim a file lock on the executable so only a single process can enter this
- // region if requested. This prevents the loader from spurious failures.
- int fd = -1;
- if (no_parallelism) {
- fd = open(get_main_executable(argv[0]).c_str(), O_RDONLY);
- if (flock(fd, LOCK_EX) == -1)
- report_error(createStringError("Failed to lock '%s': %s", argv[0],
- strerror(errno)));
- }
-
- // Drop the loader from the program arguments.
- LaunchParameters params{threads_x, threads_y, threads_z,
- blocks_x, blocks_y, blocks_z};
- int ret = load(new_argv.size(), new_argv.data(), envp,
- const_cast<char *>(image.getBufferStart()),
- image.getBufferSize(), params, print_resource_usage);
-
- if (no_parallelism) {
- if (flock(fd, LOCK_UN) == -1)
- report_error(createStringError("Failed to unlock '%s': %s", argv[0],
- strerror(errno)));
+ Expected<llvm::object::ELF64LEObjectFile> elf_or_err =
+ llvm::object::ELF64LEObjectFile::create(image);
+ if (!elf_or_err)
+ report_error(elf_or_err.takeError());
+
+ int ret = 1;
+ if (elf_or_err->getArch() == Triple::amdgcn) {
+#ifdef AMDHSA_SUPPORT
+ LaunchParameters params{threads_x, threads_y, threads_z,
+ blocks_x, blocks_y, blocks_z};
+
+ ret = load_amdhsa(new_argv.size(), new_argv.data(), envp,
+ const_cast<char *>(image.getBufferStart()),
+ image.getBufferSize(), params, print_resource_usage);
+#else
+ report_error(createStringError(
+ "Unsupported architecture; %s",
+ Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
+#endif
+ } else if (elf_or_err->getArch() == Triple::nvptx64) {
+#ifdef NVPTX_SUPPORT
+ LaunchParameters params{threads_x, threads_y, threads_z,
+ blocks_x, blocks_y, blocks_z};
+
+ ret = load_nvptx(new_argv.size(), new_argv.data(), envp,
+ const_cast<char *>(image.getBufferStart()),
+ image.getBufferSize(), params, print_resource_usage);
+#else
+ report_error(createStringError(
+ "Unsupported architecture; %s",
+ Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
+#endif
+ } else {
+ report_error(createStringError(
+ "Unsupported architecture; %s",
+ Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
}
return ret;
diff --git a/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h
new file mode 100644
index 0000000000000..ed34d0bace978
--- /dev/null
+++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h
@@ -0,0 +1,110 @@
+//===-- Generic device loader interface -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
+#define LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+/// Generic launch parameters for configuration the number of blocks / threads.
+struct LaunchParameters {
+ uint32_t num_threads_x;
+ uint32_t num_threads_y;
+ uint32_t num_threads_z;
+ uint32_t num_blocks_x;
+ uint32_t num_blocks_y;
+ uint32_t num_blocks_z;
+};
+
+/// The arguments to the '_begin' kernel.
+struct begin_args_t {
+ int argc;
+ void *argv;
+ void *envp;
+};
+
+/// The arguments to the '_start' kernel.
+struct start_args_t {
+ int argc;
+ void *argv;
+ void *envp;
+ void *ret;
+};
+
+/// The arguments to the '_end' kernel.
+struct end_args_t {
+ int argc;
+};
+
+/// Generic interface to load the \p image and launch execution of the _start
+/// kernel on the target device. Copies \p argc and \p argv to the device.
+/// Returns the final value of the `main` function on the device.
+#ifdef AMDHSA_SUPPORT
+int load_amdhsa(int argc, const char **argv, const char **evnp, void *image,
+ size_t size, const LaunchParameters ¶ms,
+ bool print_resource_usage);
+#endif
+#ifdef NVPTX_SUPPORT
+int load_nvptx(int argc, const char **argv, const char **evnp, void *image,
+ size_t size, const LaunchParameters ¶ms,
+ bool print_resource_usage);
+#endif
+
+/// Return \p V aligned "upwards" according to \p Align.
+template <typename V, typename A> inline V align_up(V val, A align) {
+ return ((val + V(align) - 1) / V(align)) * V(align);
+}
+
+/// Copy the system's argument vector to GPU memory allocated using \p alloc.
+template <typename Allocator>
+void *copy_argument_vector(int argc, const char **argv, Allocator alloc) {
+ size_t argv_size = sizeof(char *) * (argc + 1);
+ size_t str_size = 0;
+ for (int i = 0; i < argc; ++i)
+ str_size += strlen(argv[i]) + 1;
+
+ // We allocate enough space for a null terminated array and all the strings.
+ void *dev_argv = alloc(argv_size + str_size);
+ if (!dev_argv)
+ return nullptr;
+
+ // Store the strings linerally in the same memory buffer.
+ void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
+ for (int i = 0; i < argc; ++i) {
+ size_t size = strlen(argv[i]) + 1;
+ std::memcpy(dev_str, argv[i], size);
+ static_cast<void **>(dev_argv)[i] = dev_str;
+ dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
+ }
+
+ // Ensure the vector is null terminated.
+ reinterpret_cast<void **>(dev_argv)[argc] = nullptr;
+ return dev_argv;
+}
+
+/// Copy the system's environment to GPU memory allocated using \p alloc.
+template <typename Allocator>
+void *copy_environment(const char **envp, Allocator alloc) {
+ int envc = 0;
+ for (const char **env = envp; *env != 0; ++env)
+ ++envc;
+
+ return copy_argument_vector(envc, envp, alloc);
+}
+
+inline void handle_error_impl(const char *file, int32_t line, const char *msg) {
+ fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg);
+ exit(EXIT_FAILURE);
+}
+#define handle_error(X) handle_error_impl(__FILE__, __LINE__, X)
+
+#endif // LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
diff --git a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp b/llvm/tools/llvm-gpu-loader/nvptx.cpp
similarity index 98%
rename from libc/utils/gpu/loader/nvptx/nvptx-loader.cpp
rename to llvm/tools/llvm-gpu-loader/nvptx.cpp
index 7d6c176c6f360..13c62d50e6077 100644
--- a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp
+++ b/llvm/tools/llvm-gpu-loader/nvptx.cpp
@@ -13,7 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Loader.h"
+#include "llvm-gpu-loader.h"
+#include "server.h"
#include "cuda.h"
@@ -236,9 +237,9 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server,
return CUDA_SUCCESS;
}
-int load(int argc, const char **argv, const char **envp, void *image,
- size_t size, const LaunchParameters ¶ms,
- bool print_resource_usage) {
+int load_nvptx(int argc, const char **argv, const char **envp, void *image,
+ size_t size, const LaunchParameters ¶ms,
+ bool print_resource_usage) {
if (CUresult err = cuInit(0))
handle_error(err);
// Obtain the first device found on the system.
diff --git a/libc/utils/gpu/loader/Loader.h b/llvm/tools/llvm-gpu-loader/server.h
similarity index 52%
rename from libc/utils/gpu/loader/Loader.h
rename to llvm/tools/llvm-gpu-loader/server.h
index ec05117a041ab..bc54b4b74915a 100644
--- a/libc/utils/gpu/loader/Loader.h
+++ b/llvm/tools/llvm-gpu-loader/server.h
@@ -1,4 +1,4 @@
-//===-- Generic device loader interface -----------------------------------===//
+//===-- Common RPC server handler -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
-#define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
+#ifndef LLVM_TOOLS_LLVM_GPU_LOADER_SERVER_H
+#define LLVM_TOOLS_LLVM_GPU_LOADER_SERVER_H
+
+#include <cstddef>
+#include <cstdint>
#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
@@ -15,97 +18,6 @@
#include "shared/rpc_opcodes.h"
#include "shared/rpc_server.h"
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-/// Generic launch parameters for configuration the number of blocks / threads.
-struct LaunchParameters {
- uint32_t num_threads_x;
- uint32_t num_threads_y;
- uint32_t num_threads_z;
- uint32_t num_blocks_x;
- uint32_t num_blocks_y;
- uint32_t num_blocks_z;
-};
-
-/// The arguments to the '_begin' kernel.
-struct begin_args_t {
- int argc;
- void *argv;
- void *envp;
-};
-
-/// The arguments to the '_start' kernel.
-struct start_args_t {
- int argc;
- void *argv;
- void *envp;
- void *ret;
-};
-
-/// The arguments to the '_end' kernel.
-struct end_args_t {
- int argc;
-};
-
-/// Generic interface to load the \p image and launch execution of the _start
-/// kernel on the target device. Copies \p argc and \p argv to the device.
-/// Returns the final value of the `main` function on the device.
-int load(int argc, const char **argv, const char **evnp, void *image,
- size_t size, const LaunchParameters ¶ms,
- bool print_resource_usage);
-
-/// Return \p V aligned "upwards" according to \p Align.
-template <typename V, typename A> inline V align_up(V val, A align) {
- return ((val + V(align) - 1) / V(align)) * V(align);
-}
-
-/// Copy the system's argument vector to GPU memory allocated using \p alloc.
-template <typename Allocator>
-void *copy_argument_vector(int argc, const char **argv, Allocator alloc) {
- size_t argv_size = sizeof(char *) * (argc + 1);
- size_t str_size = 0;
- for (int i = 0; i < argc; ++i)
- str_size += strlen(argv[i]) + 1;
-
- // We allocate enough space for a null terminated array and all the strings.
- void *dev_argv = alloc(argv_size + str_size);
- if (!dev_argv)
- return nullptr;
-
- // Store the strings linerally in the same memory buffer.
- void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
- for (int i = 0; i < argc; ++i) {
- size_t size = strlen(argv[i]) + 1;
- std::memcpy(dev_str, argv[i], size);
- static_cast<void **>(dev_argv)[i] = dev_str;
- dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
- }
-
- // Ensure the vector is null terminated.
- reinterpret_cast<void **>(dev_argv)[argc] = nullptr;
- return dev_argv;
-}
-
-/// Copy the system's environment to GPU memory allocated using \p alloc.
-template <typename Allocator>
-void *copy_environment(const char **envp, Allocator alloc) {
- int envc = 0;
- for (const char **env = envp; *env != 0; ++env)
- ++envc;
-
- return copy_argument_vector(envc, envp, alloc);
-}
-
-inline void handle_error_impl(const char *file, int32_t line, const char *msg) {
- fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg);
- exit(EXIT_FAILURE);
-}
-#define handle_error(X) handle_error_impl(__FILE__, __LINE__, X)
-
template <uint32_t num_lanes, typename Alloc, typename Free>
inline uint32_t handle_server(rpc::Server &server, uint32_t index,
Alloc &&alloc, Free &&free) {
@@ -195,4 +107,4 @@ inline uint32_t handle_server(rpc::Server &server, uint32_t index,
return index;
}
-#endif
+#endif // LLVM_TOOLS_LLVM_GPU_LOADER_SERVER_H
More information about the llvm-commits
mailing list