[llvm-branch-commits] [llvm] 4c9b7ff - [LLVM] Introduce 'llvm-offload-wrapper' tool (#153504)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 19 09:48:56 PDT 2025
Author: Joseph Huber
Date: 2025-08-19T11:05:48-05:00
New Revision: 4c9b7ff04cb4376379a483fcbd2af780a62473d6
URL: https://github.com/llvm/llvm-project/commit/4c9b7ff04cb4376379a483fcbd2af780a62473d6
DIFF: https://github.com/llvm/llvm-project/commit/4c9b7ff04cb4376379a483fcbd2af780a62473d6.diff
LOG: [LLVM] Introduce 'llvm-offload-wrapper' tool (#153504)
Summary:
This is a standalone tool that does the wrapper stage of the
`clang-linker-wrapper`. We want this to be an external tool because
currently there's no easy way to split apart what the
clang-linker-wrapper is doing under the hood. With this tool, users can
manually extract files with `clang-offload-packager`, feed them through
`clang --target=<triple>` and then use this tool to generate a `.bc`
file they can give to the linker. The goal here is to make reproducing
the linker wrapper steps easier.
Added:
llvm/test/Other/offload-wrapper.ll
llvm/tools/llvm-offload-wrapper/CMakeLists.txt
llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
Modified:
clang/docs/ClangLinkerWrapper.rst
llvm/test/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/docs/ClangLinkerWrapper.rst b/clang/docs/ClangLinkerWrapper.rst
index e69cdba434c93..eb38d2b8fb5ee 100644
--- a/clang/docs/ClangLinkerWrapper.rst
+++ b/clang/docs/ClangLinkerWrapper.rst
@@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
--v Display the version number and exit
-- The separator for the wrapped linker arguments
+The linker wrapper will generate the appropriate runtime calls to register the
+generated device binary with the offloading runtime. To do this step manually we
+provide the ``llvm-offload-wrapper`` utility.
+
Relocatable Linking
===================
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index b46f4829605a1..f6333d68a8ea5 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS
llvm-objdump
llvm-opt-fuzzer
llvm-opt-report
+ llvm-offload-wrapper
llvm-otool
llvm-pdbutil
llvm-profdata
diff --git a/llvm/test/Other/offload-wrapper.ll b/llvm/test/Other/offload-wrapper.ll
new file mode 100644
index 0000000000000..9107a141ad201
--- /dev/null
+++ b/llvm/test/Other/offload-wrapper.ll
@@ -0,0 +1,52 @@
+; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
+
+; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
+; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
+; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
+; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+; HIP-NEXT: @.hip.binary_handle = internal global ptr null
+; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
+
+; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
+; HIP-NEXT: call void @.hip.globals_reg(ptr %0)
+; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+; HIP-NEXT: ret void
+; HIP-NEXT: }
+
+; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
+; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
+; HIP-NEXT: ret void
+; HIP-NEXT: }
+
+; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
+
+; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
+; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
+; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
+; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
+; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
+; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
+
+; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
+; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
+; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
+; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
+; CUDA-NEXT: ret void
+; CUDA-NEXT: }
+
+; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
+; CUDA-NEXT: ret void
+; CUDA-NEXT: }
diff --git a/llvm/tools/llvm-offload-wrapper/CMakeLists.txt b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt
new file mode 100644
index 0000000000000..2e2cdb53b5b41
--- /dev/null
+++ b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS
+ BitWriter
+ Object
+ Option
+ FrontendOffloading
+ Support
+ TargetParser
+ )
+
+add_llvm_tool(llvm-offload-wrapper
+ llvm-offload-wrapper.cpp
+
+ DEPENDS
+ intrinsics_gen
+ )
diff --git a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
new file mode 100644
index 0000000000000..9dac1646b1e26
--- /dev/null
+++ b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
@@ -0,0 +1,135 @@
+//===- llvm-offload-wrapper: Create runtime registration code for devices -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides a utility for generating runtime registration code for device code.
+// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and
+// create a new IR module that calls the respective runtime to load it on the
+// device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Frontend/Offloading/OffloadWrapper.h"
+#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Host.h"
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+static cl::OptionCategory
+ OffloadWrapeprCategory("llvm-offload-wrapper options");
+
+static cl::opt<object::OffloadKind> Kind(
+ "kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory),
+ cl::Required,
+ cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"),
+ clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"),
+ clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries")));
+
+static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
+ cl::value_desc("file"),
+ cl::cat(OffloadWrapeprCategory));
+
+static cl::list<std::string> InputFiles(cl::Positional,
+ cl::desc("Wrap input from <file>"),
+ cl::value_desc("file"), cl::OneOrMore,
+ cl::cat(OffloadWrapeprCategory));
+
+static cl::opt<std::string>
+ TheTriple("triple", cl::desc("Target triple for the wrapper module"),
+ cl::init(sys::getDefaultTargetTriple()),
+ cl::cat(OffloadWrapeprCategory));
+
+static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
+ if (BuffersToWrap.size() > 1 &&
+ (Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP))
+ return createStringError(
+ "CUDA / HIP offloading uses a single fatbinary or offload bundle");
+
+ LLVMContext Context;
+ Module M("offload.wrapper.module", Context);
+ M.setTargetTriple(Triple());
+
+ switch (Kind) {
+ case llvm::object::OFK_OpenMP:
+ if (Error Err = offloading::wrapOpenMPBinaries(
+ M, BuffersToWrap, offloading::getOffloadEntryArray(M),
+ /*Suffix=*/"", /*Relocatable=*/false))
+ return Err;
+ break;
+ case llvm::object::OFK_Cuda:
+ if (Error Err = offloading::wrapCudaBinary(
+ M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M),
+ /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
+ return Err;
+ break;
+ case llvm::object::OFK_HIP:
+ if (Error Err = offloading::wrapHIPBinary(
+ M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
+ return Err;
+ break;
+ default:
+ return createStringError(getOffloadKindName(Kind) +
+ " wrapping is not supported");
+ }
+
+ int FD = -1;
+ if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
+ return errorCodeToError(EC);
+ llvm::raw_fd_ostream OS(FD, true);
+ WriteBitcodeToFile(M, OS);
+
+ return Error::success();
+}
+
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+ cl::HideUnrelatedOptions(OffloadWrapeprCategory);
+ cl::ParseCommandLineOptions(
+ argc, argv,
+ "Generate runtime registration code for a device binary image\n");
+
+ if (Help) {
+ cl::PrintHelpMessage();
+ return EXIT_SUCCESS;
+ }
+
+ auto ReportError = [argv](Error E) {
+ logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
+ exit(EXIT_FAILURE);
+ };
+
+ SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+ SmallVector<ArrayRef<char>> BuffersToWrap;
+ for (StringRef Input : InputFiles) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(Input);
+ if (std::error_code EC = BufferOrErr.getError())
+ ReportError(createFileError(Input, EC));
+ std::unique_ptr<MemoryBuffer> &Buffer =
+ Buffers.emplace_back(std::move(*BufferOrErr));
+ BuffersToWrap.emplace_back(
+ ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
+ }
+
+ if (Error Err = wrapImages(BuffersToWrap))
+ ReportError(std::move(Err));
+
+ return EXIT_SUCCESS;
+}
More information about the llvm-branch-commits
mailing list