[libc] [llvm] [LLVM] Port 'llvm-gpu-loader' to use LLVMOffload (PR #162739)
Kevin Sala Penades via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 2 00:08:34 PDT 2025
================
@@ -35,121 +35,255 @@
using namespace llvm;
-static cl::OptionCategory loader_category("loader options");
+static cl::OptionCategory LoaderCategory("loader options");
-static cl::opt<bool> help("h", cl::desc("Alias for -help"), cl::Hidden,
- cl::cat(loader_category));
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden,
+ cl::cat(LoaderCategory));
static cl::opt<unsigned>
- threads_x("threads-x", cl::desc("Number of threads in the 'x' dimension"),
- cl::init(1), cl::cat(loader_category));
+ ThreadsX("threads-x", cl::desc("Number of threads in the 'x' dimension"),
+ cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
- threads_y("threads-y", cl::desc("Number of threads in the 'y' dimension"),
- cl::init(1), cl::cat(loader_category));
+ ThreadsY("threads-y", cl::desc("Number of threads in the 'y' dimension"),
+ cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
- threads_z("threads-z", cl::desc("Number of threads in the 'z' dimension"),
- cl::init(1), cl::cat(loader_category));
-static cl::alias threads("threads", cl::aliasopt(threads_x),
+ ThreadsZ("threads-z", cl::desc("Number of threads in the 'z' dimension"),
+ cl::init(1), cl::cat(LoaderCategory));
+static cl::alias threads("threads", cl::aliasopt(ThreadsX),
cl::desc("Alias for --threads-x"),
- cl::cat(loader_category));
+ cl::cat(LoaderCategory));
static cl::opt<unsigned>
- blocks_x("blocks-x", cl::desc("Number of blocks in the 'x' dimension"),
- cl::init(1), cl::cat(loader_category));
+ BlocksX("blocks-x", cl::desc("Number of blocks in the 'x' dimension"),
+ cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
- blocks_y("blocks-y", cl::desc("Number of blocks in the 'y' dimension"),
- cl::init(1), cl::cat(loader_category));
+ BlocksY("blocks-y", cl::desc("Number of blocks in the 'y' dimension"),
+ cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
- blocks_z("blocks-z", cl::desc("Number of blocks in the 'z' dimension"),
- cl::init(1), cl::cat(loader_category));
-static cl::alias blocks("blocks", cl::aliasopt(blocks_x),
+ BlocksZ("blocks-z", cl::desc("Number of blocks in the 'z' dimension"),
+ cl::init(1), cl::cat(LoaderCategory));
+static cl::alias Blocks("blocks", cl::aliasopt(BlocksX),
cl::desc("Alias for --blocks-x"),
- cl::cat(loader_category));
+ cl::cat(LoaderCategory));
-static cl::opt<bool>
- print_resource_usage("print-resource-usage",
- cl::desc("Output resource usage of launched kernels"),
- cl::init(false), cl::cat(loader_category));
-
-static cl::opt<std::string> file(cl::Positional, cl::Required,
+static cl::opt<std::string> File(cl::Positional, cl::Required,
cl::desc("<gpu executable>"),
- cl::cat(loader_category));
-static cl::list<std::string> args(cl::ConsumeAfter,
+ cl::cat(LoaderCategory));
+static cl::list<std::string> Args(cl::ConsumeAfter,
cl::desc("<program arguments>..."),
- cl::cat(loader_category));
+ cl::cat(LoaderCategory));
+
+// The arguments to the '_begin' kernel.
+struct BeginArgs {
+ int Argc;
+ void *Argv;
+ void *Envp;
+};
+
+// The arguments to the '_start' kernel.
+struct StartArgs {
+ int Argc;
+ void *Argv;
+ void *Envp;
+ void *Ret;
+};
-[[noreturn]] void report_error(Error E) {
+// The arguments to the '_end' kernel.
+struct EndArgs {};
+
+[[noreturn]] static void handleError(Error E) {
outs().flush();
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), "loader"));
exit(EXIT_FAILURE);
}
-std::string get_main_executable(const char *name) {
- void *ptr = (void *)(intptr_t)&get_main_executable;
- auto cow_path = sys::fs::getMainExecutable(name, ptr);
- return sys::path::parent_path(cow_path).str();
+[[noreturn]] static void handleError(ol_result_t Err, unsigned Line) {
+ fprintf(stderr, "%s:%d %s\n", __FILE__, Line, Err->Details);
+ exit(EXIT_FAILURE);
+}
+
+#define OFFLOAD_ERR(X) \
+ if (ol_result_t Err = X) \
+ handleError(Err, __LINE__);
+
+static void *copyArgumentVector(int Argc, const char **Argv,
+ ol_device_handle_t Device) {
+ size_t ArgSize = sizeof(char *) * (Argc + 1);
+ size_t StringLen = 0;
+ for (int i = 0; i < Argc; ++i)
+ StringLen += strlen(Argv[i]) + 1;
+
+ // We allocate enough space for a null terminated array and all the strings.
+ void *DevArgv;
+ OFFLOAD_ERR(
+ olMemAlloc(Device, OL_ALLOC_TYPE_HOST, ArgSize + StringLen, &DevArgv));
+ if (!DevArgv)
+ handleError(
+ createStringError("Failed to allocate memory for environment."));
+
+ // Store the strings linerally in the same memory buffer.
+ void *DevString = reinterpret_cast<uint8_t *>(DevArgv) + ArgSize;
+ for (int i = 0; i < Argc; ++i) {
+ size_t size = strlen(Argv[i]) + 1;
+ std::memcpy(DevString, Argv[i], size);
+ static_cast<void **>(DevArgv)[i] = DevString;
+ DevString = reinterpret_cast<uint8_t *>(DevString) + size;
+ }
+
+ // Ensure the vector is null terminated.
+ reinterpret_cast<void **>(DevArgv)[Argc] = nullptr;
+ return DevArgv;
+}
+
+void *copyEnvironment(const char **Envp, ol_device_handle_t Device) {
+ int Envc = 0;
+ for (const char **Env = Envp; *Env != 0; ++Env)
+ ++Envc;
+
+ return copyArgumentVector(Envc, Envp, Device);
+}
+
+ol_device_handle_t findDevice(MemoryBufferRef Binary) {
+ ol_device_handle_t Device;
+ std::tuple Data = std::make_tuple(&Device, &Binary);
+ OFFLOAD_ERR(olIterateDevices(
+ [](ol_device_handle_t Device, void *UserData) {
+ auto &[Output, Binary] = *reinterpret_cast<decltype(Data) *>(UserData);
+ bool IsValid = false;
+ OFFLOAD_ERR(olIsValidBinary(Device, Binary->getBufferStart(),
+ Binary->getBufferSize(), &IsValid));
+ if (!IsValid)
+ return true;
+
+ *Output = Device;
+ return false;
+ },
+ &Data));
+ return Device;
+}
+
+ol_device_handle_t getHostDevice() {
+ ol_device_handle_t Device;
+ OFFLOAD_ERR(olIterateDevices(
+ [](ol_device_handle_t Device, void *UserData) {
+ ol_platform_handle_t Platform;
+ olGetDeviceInfo(Device, OL_DEVICE_INFO_PLATFORM, sizeof(Platform),
+ &Platform);
+ ol_platform_backend_t Backend;
+ olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
+ &Backend);
+
+ auto &Output = *reinterpret_cast<decltype(Device) *>(UserData);
+ if (Backend == OL_PLATFORM_BACKEND_HOST) {
+ Output = Device;
+ return false;
+ }
+ return true;
+ },
+ &Device));
+ return Device;
+}
+
+ol_program_handle_t loadBinary(std::vector<char> &Binary,
+ std::vector<ol_device_handle_t> &Devices) {
+ for (ol_device_handle_t &Device : Devices) {
+ bool IsValid = false;
+ OFFLOAD_ERR(
+ olIsValidBinary(Device, Binary.data(), Binary.size(), &IsValid));
+ if (!IsValid)
+ continue;
+
+ ol_program_handle_t Program;
+ OFFLOAD_ERR(
+ olCreateProgram(Device, Binary.data(), Binary.size(), &Program));
+ return Program;
+ }
+ handleError(
+ createStringError("No valid device found for '%s'", File.c_str()));
+}
+
+template <typename Args>
+void launchKernel(ol_queue_handle_t Queue, ol_device_handle_t Device,
+ ol_program_handle_t Program, const char *Name,
+ ol_kernel_launch_size_args_t LaunchArgs, Args KernelArgs) {
----------------
kevinsala wrote:
The `Args KernelArgs` are being copied. Is this the desired behavior?
https://github.com/llvm/llvm-project/pull/162739
More information about the llvm-commits
mailing list