[llvm-branch-commits] [llvm] [Offload] Add `olLinkProgram` (PR #148648)
Ross Brunton via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 14 08:13:35 PDT 2025
https://github.com/RossBrunton created https://github.com/llvm/llvm-project/pull/148648
A version of `olCreateProgram` that inputs many bitcode files and links
them together before loading them.
>From 8589fcc6d053cb2937cf970d1ce354abfb84da31 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Mon, 14 Jul 2025 16:05:41 +0100
Subject: [PATCH] [Offload] Add `olLinkProgram`
A version of `olCreateProgram` that inputs many bitcode files and links
them together before loading them.
---
offload/liboffload/API/Program.td | 28 ++++++
offload/liboffload/src/OffloadImpl.cpp | 33 +++++++
offload/plugins-nextgen/common/include/JIT.h | 4 +
.../common/include/PluginInterface.h | 4 +
offload/plugins-nextgen/common/src/JIT.cpp | 41 ++++++++
.../common/src/PluginInterface.cpp | 7 ++
offload/unittests/OffloadAPI/CMakeLists.txt | 3 +-
.../OffloadAPI/device_code/CMakeLists.txt | 4 +
.../unittests/OffloadAPI/device_code/link_a.c | 11 +++
.../unittests/OffloadAPI/device_code/link_b.c | 10 ++
.../OffloadAPI/program/olLinkProgram.cpp | 99 +++++++++++++++++++
11 files changed, 243 insertions(+), 1 deletion(-)
create mode 100644 offload/unittests/OffloadAPI/device_code/link_a.c
create mode 100644 offload/unittests/OffloadAPI/device_code/link_b.c
create mode 100644 offload/unittests/OffloadAPI/program/olLinkProgram.cpp
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 0476fa1f7c27a..3dae37f288ff7 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -25,6 +25,34 @@ def : Function {
let returns = [];
}
+def : Struct {
+ let name = "ol_program_link_buffer_t";
+ let desc = "An image to link with `olLinkProgram`.";
+ let members = [
+ StructMember<"void *", "Address", "base address of memory image">,
+ StructMember<"size_t", "Size", "size in bytes of memory image">,
+ ];
+}
+
+def : Function {
+ let name = "olLinkProgram";
+ let desc = "Compile and link multiple bitcode images into a single binary.";
+ let details = [
+ "No caching is performed; multiple calls to `olLinkProgram` with the same images will result in multiple linking operations",
+ ];
+ let params = [
+ Param<"ol_device_handle_t", "Device", "handle of the device to link for", PARAM_IN>,
+ Param<"ol_program_link_buffer_t *", "Images", "a pointer to an array of `ImagesSize` entries, one for each image to link", PARAM_IN>,
+ Param<"size_t", "ImagesSize", "the number of elements in `Images`", PARAM_IN>,
+ Param<"ol_program_handle_t*", "Program", "output handle for the created program", PARAM_OUT>
+ ];
+ let returns = [
+ Return<"OL_ERRC_INVALID_SIZE", ["`ImagesSize == 0`"]>,
+ Return<"OL_ERRC_INVALID_BINARY", ["Any image is not in the bitcode format"]>,
+ Return<"OL_ERRC_UNSUPPORTED", ["Linking is not supported for this device and `ImagesSize` > 1"]>,
+ ];
+}
+
def : Function {
let name = "olDestroyProgram";
let desc = "Destroy the program and free all underlying resources.";
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 17a2b00cb7140..14af015460c8c 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -650,6 +650,39 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
return Error::success();
}
+Error olLinkProgram_impl(ol_device_impl_t *Device,
+ ol_program_link_buffer_t *Images, size_t ImagesSize,
+ ol_program_handle_t *Program) {
+ std::vector<__tgt_device_image> DevImages;
+ for (size_t I = 0; I < ImagesSize; I++) {
+ auto &ProgData = Images[I];
+ DevImages.push_back({ProgData.Address,
+ utils::advancePtr(ProgData.Address, ProgData.Size),
+ nullptr, nullptr});
+ }
+
+ auto LinkResult =
+ Device->Device->jitLinkBinary(Device->Device->Plugin, DevImages);
+ if (!LinkResult)
+ return LinkResult.takeError();
+
+ ol_program_handle_t Prog =
+ new ol_program_impl_t(nullptr, nullptr, *LinkResult);
+
+ auto Res =
+ Device->Device->loadBinary(Device->Device->Plugin, &Prog->DeviceImage);
+ if (!Res) {
+ delete Prog;
+ return Res.takeError();
+ }
+ assert(*Res != nullptr && "loadBinary returned nullptr");
+
+ Prog->Image = *Res;
+ *Program = Prog;
+
+ return Error::success();
+}
+
Error olDestroyProgram_impl(ol_program_handle_t Program) {
auto &Device = Program->Image->getDevice();
if (auto Err = Device.unloadBinary(Program->Image))
diff --git a/offload/plugins-nextgen/common/include/JIT.h b/offload/plugins-nextgen/common/include/JIT.h
index 1d6280a0af141..08b82c4aefb8d 100644
--- a/offload/plugins-nextgen/common/include/JIT.h
+++ b/offload/plugins-nextgen/common/include/JIT.h
@@ -55,6 +55,10 @@ struct JITEngine {
process(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device);
+ /// Link and compile multiple bitcode images into a single binary
+ Expected<__tgt_device_image> link(std::vector<__tgt_device_image> &Images,
+ target::plugin::GenericDeviceTy &Device);
+
private:
/// Compile the bitcode image \p Image and generate the binary image that can
/// be loaded to the target device of the triple \p Triple architecture \p
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 7824257d28e1f..79e021cc64f3b 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -749,6 +749,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Load the binary image into the device and return the target table.
Expected<DeviceImageTy *> loadBinary(GenericPluginTy &Plugin,
const __tgt_device_image *TgtImage);
+ /// Link and compile multiple bitcode images into a single image.
+ Expected<__tgt_device_image>
+ jitLinkBinary(GenericPluginTy &Plugin,
+ std::vector<__tgt_device_image> InputImages);
virtual Expected<DeviceImageTy *>
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
diff --git a/offload/plugins-nextgen/common/src/JIT.cpp b/offload/plugins-nextgen/common/src/JIT.cpp
index 835dcc0da2ec9..2cf6ddbfdff0b 100644
--- a/offload/plugins-nextgen/common/src/JIT.cpp
+++ b/offload/plugins-nextgen/common/src/JIT.cpp
@@ -327,3 +327,44 @@ JITEngine::process(const __tgt_device_image &Image,
return &Image;
}
+
+Expected<__tgt_device_image>
+JITEngine::link(std::vector<__tgt_device_image> &Images,
+ target::plugin::GenericDeviceTy &Device) {
+ const std::string &ComputeUnitKind = Device.getComputeUnitKind();
+ ComputeUnitInfo &CUI = ComputeUnitMap[ComputeUnitKind];
+
+ PostProcessingFn PostProcessing =
+ [&Device](llvm::SmallVector<std::unique_ptr<MemoryBuffer>> &&MB)
+ -> Expected<std::unique_ptr<MemoryBuffer>> {
+ return Device.doJITPostProcessing(std::move(MB));
+ };
+
+ std::lock_guard<std::mutex> Lock(ComputeUnitMapMutex);
+
+ llvm::SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+ size_t Index = 0;
+ for (auto &I : Images) {
+ if (!isImageBitcode(I))
+ return error::createOffloadError(
+ error::ErrorCode::INVALID_BINARY,
+ "binary %i provided to link operation is not bitcode", Index);
+ Index++;
+
+ auto ObjMBOrErr = getOrCreateObjFile(I, CUI.Context, ComputeUnitKind);
+ if (!ObjMBOrErr)
+ return ObjMBOrErr.takeError();
+ Buffers.push_back(std::move(*ObjMBOrErr));
+ }
+
+ auto ImageMBOrErr = PostProcessing(std::move(Buffers));
+ if (!ImageMBOrErr)
+ return ImageMBOrErr.takeError();
+
+ auto &ImageMB = CUI.JITImages.emplace_back(std::move(*ImageMBOrErr));
+ __tgt_device_image JITedImage{};
+ JITedImage.ImageStart = const_cast<char *>(ImageMB->getBufferStart());
+ JITedImage.ImageEnd = const_cast<char *>(ImageMB->getBufferEnd());
+
+ return JITedImage;
+}
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 81b9d423e13d8..9e2234dcc148b 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -903,6 +903,13 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
return deinitImpl();
}
+
+Expected<__tgt_device_image>
+GenericDeviceTy::jitLinkBinary(GenericPluginTy &Plugin,
+ std::vector<__tgt_device_image> InputImages) {
+ return Plugin.getJIT().link(InputImages, *this);
+}
+
Expected<DeviceImageTy *>
GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
const __tgt_device_image *InputTgtImage) {
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index d76338612210d..e13ded4f8b1aa 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -32,7 +32,8 @@ add_offload_unittest("platform"
add_offload_unittest("program"
program/olCreateProgram.cpp
- program/olDestroyProgram.cpp)
+ program/olDestroyProgram.cpp
+ program/olLinkProgram.cpp)
add_offload_unittest("queue"
queue/olCreateQueue.cpp
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 11c8ccbd6c7c5..c3e07724086fe 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -8,6 +8,8 @@ add_offload_test_device_code(localmem_static.c localmem_static)
add_offload_test_device_code(global.c global)
add_offload_test_device_code(global_ctor.c global_ctor)
add_offload_test_device_code(global_dtor.c global_dtor)
+add_offload_test_device_code(link_a.c link_a)
+add_offload_test_device_code(link_b.c link_b)
add_custom_target(offload_device_binaries DEPENDS
foo.bin
@@ -19,5 +21,7 @@ add_custom_target(offload_device_binaries DEPENDS
global.bin
global_ctor.bin
global_dtor.bin
+ link_a.bin
+ link_b.bin
)
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/OffloadAPI/device_code/link_a.c b/offload/unittests/OffloadAPI/device_code/link_a.c
new file mode 100644
index 0000000000000..7feb92189c018
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/link_a.c
@@ -0,0 +1,11 @@
+#include <gpuintrin.h>
+#include <stdint.h>
+
+uint32_t global;
+
+extern uint32_t funky();
+
+__gpu_kernel void link_a(uint32_t *out) {
+ out[0] = funky();
+ out[1] = global;
+}
diff --git a/offload/unittests/OffloadAPI/device_code/link_b.c b/offload/unittests/OffloadAPI/device_code/link_b.c
new file mode 100644
index 0000000000000..82f41fd8a0218
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/link_b.c
@@ -0,0 +1,10 @@
+#include <gpuintrin.h>
+#include <stdint.h>
+
+extern uint32_t global[64];
+
+[[gnu::visibility("default")]]
+uint32_t funky() {
+ global[0] = 100;
+ return 200;
+}
diff --git a/offload/unittests/OffloadAPI/program/olLinkProgram.cpp b/offload/unittests/OffloadAPI/program/olLinkProgram.cpp
new file mode 100644
index 0000000000000..122d0156e6e0c
--- /dev/null
+++ b/offload/unittests/OffloadAPI/program/olLinkProgram.cpp
@@ -0,0 +1,99 @@
+//===------- Offload API tests - olCreateProgram --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olLinkProgramTest = OffloadQueueTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLinkProgramTest);
+
+TEST_P(olLinkProgramTest, SuccessSingle) {
+
+ std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
+ ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
+ ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
+
+ ol_program_link_buffer_t Buffers[1] = {
+ {const_cast<char *>(DeviceBin->getBufferStart()),
+ DeviceBin->getBufferSize()},
+ };
+
+ ol_program_handle_t Program;
+ ASSERT_SUCCESS(olLinkProgram(Device, Buffers, 1, &Program));
+ ASSERT_NE(Program, nullptr);
+
+ ASSERT_SUCCESS(olDestroyProgram(Program));
+}
+
+TEST_P(olLinkProgramTest, SuccessBuild) {
+ std::unique_ptr<llvm::MemoryBuffer> ABin;
+ ASSERT_TRUE(TestEnvironment::loadDeviceBinary("link_a", Device, ABin));
+ std::unique_ptr<llvm::MemoryBuffer> BBin;
+ ASSERT_TRUE(TestEnvironment::loadDeviceBinary("link_b", Device, BBin));
+
+ ol_program_link_buffer_t Buffers[2] = {
+ {const_cast<char *>(ABin->getBufferStart()), ABin->getBufferSize()},
+ {const_cast<char *>(BBin->getBufferStart()), BBin->getBufferSize()},
+ };
+
+ ol_program_handle_t Program;
+ auto LinkResult = olLinkProgram(Device, Buffers, 2, &Program);
+ if (LinkResult && LinkResult->Code == OL_ERRC_UNSUPPORTED)
+ GTEST_SKIP() << "Linking unsupported: " << LinkResult->Details;
+ ASSERT_SUCCESS(LinkResult);
+ ASSERT_NE(Program, nullptr);
+
+ ol_symbol_handle_t Kernel;
+ ASSERT_SUCCESS(
+ olGetSymbol(Program, "link_a", OL_SYMBOL_KIND_KERNEL, &Kernel));
+
+ void *Mem;
+ ASSERT_SUCCESS(
+ olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 2 * sizeof(uint32_t), &Mem));
+ struct {
+ void *Mem;
+ } Args{Mem};
+ ol_kernel_launch_size_args_t LaunchArgs{};
+ LaunchArgs.GroupSize = {1, 1, 1};
+ LaunchArgs.NumGroups = {1, 1, 1};
+ LaunchArgs.Dimensions = 1;
+
+ ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
+ &LaunchArgs, nullptr));
+ ASSERT_SUCCESS(olWaitQueue(Queue));
+
+ uint32_t *Data = (uint32_t *)Mem;
+ ASSERT_EQ(Data[0], 200);
+ ASSERT_EQ(Data[1], 100);
+
+ ASSERT_SUCCESS(olMemFree(Mem));
+ ASSERT_SUCCESS(olDestroyProgram(Program));
+}
+
+TEST_P(olLinkProgramTest, InvalidNotBitcode) {
+ char FakeElf[] =
+ "\177ELF0000000000000000000000000000000000000000000000000000"
+ "00000000000000000000000000000000000000000000000000000000000";
+
+ ol_program_link_buffer_t Buffers[1] = {
+ {FakeElf, sizeof(FakeElf)},
+ };
+
+ ol_program_handle_t Program;
+ ASSERT_ERROR(OL_ERRC_INVALID_BINARY,
+ olLinkProgram(Device, Buffers, 1, &Program));
+}
+
+TEST_P(olLinkProgramTest, InvalidSize) {
+ ol_program_link_buffer_t Buffers[0] = {};
+
+ ol_program_handle_t Program;
+ ASSERT_ERROR(OL_ERRC_INVALID_SIZE,
+ olLinkProgram(Device, Buffers, 0, &Program));
+}
More information about the llvm-branch-commits
mailing list