[clang] [llvm] [HIP][MacOS] Mach-O support and Darwin toolchain fixes (PR #183991)
Paulius Velesko via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 3 03:57:20 PDT 2026
https://github.com/pvelesko updated https://github.com/llvm/llvm-project/pull/183991
>From 45ce6a73db8983607a3a62119c98d7db3c5e5292 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Sun, 1 Mar 2026 10:59:45 +0200
Subject: [PATCH 1/8] [Darwin] MacOS combatability with HIPSPV
CGCUDANV, HIPUtility: Use Mach-O segment,section format for HIP on macOS
[Driver][Darwin] Add ensureTargetInitialized() for lazy target init from triple
When Darwin is used as a host toolchain for device offloading (e.g.
HIP/CUDA), TranslateArgs may not run before methods that query the
target platform, leaving TargetInitialized false and triggering asserts.
Add ensureTargetInitialized() which infers the platform and version from
the triple, and call it from ComputeEffectiveClangTriple,
addClangWarningOptions, CheckObjCARC, and getSupportedSanitizers instead
of the previous isTargetInitialized() bail-out guards.
[Driver][Darwin] Skip host-stdlib flags when compiling device code
When DeviceOffloadKind != OFK_None, return early from
Darwin::addClangTargetOptions() after MachO flags. This skips
host-specific flags like -faligned-alloc-unavailable and
-fno-sized-deallocation that are irrelevant to device compilation
and break SPIR-V builds.
Add macOS handling for GPU bin handle section in HIP fat binary
---
clang/lib/CodeGen/CGCUDANV.cpp | 10 +++-
clang/lib/Driver/ToolChains/Darwin.cpp | 67 +++++++++++++++++++++-
clang/lib/Driver/ToolChains/Darwin.h | 6 ++
clang/lib/Driver/ToolChains/HIPUtility.cpp | 13 +++--
4 files changed, 88 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index e04da90b3cbf6..f08040d1d3d15 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -817,10 +817,14 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
llvm::Constant *FatBinStr;
unsigned FatMagic;
if (IsHIP) {
- FatbinConstantName = ".hip_fatbin";
- FatbinSectionName = ".hipFatBinSegment";
+ // On macOS (Mach-O), section names must be in "segment,section" format.
+ FatbinConstantName =
+ CGM.getTriple().isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin";
+ FatbinSectionName =
+ CGM.getTriple().isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment";
- ModuleIDSectionName = "__hip_module_id";
+ ModuleIDSectionName =
+ CGM.getTriple().isMacOSX() ? "__HIP,__module_id" : "__hip_module_id";
ModuleIDPrefix = "__hip_";
if (CudaGpuBinary) {
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 8b000845fea18..61b779c60b90f 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1140,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
Darwin::~Darwin() {}
+void Darwin::ensureTargetInitialized() const {
+ if (TargetInitialized)
+ return;
+
+ llvm::Triple::OSType OS = getTriple().getOS();
+
+ DarwinPlatformKind Platform;
+ switch (OS) {
+ case llvm::Triple::Darwin:
+ case llvm::Triple::MacOSX:
+ Platform = MacOS;
+ break;
+ case llvm::Triple::IOS:
+ Platform = IPhoneOS;
+ break;
+ case llvm::Triple::TvOS:
+ Platform = TvOS;
+ break;
+ case llvm::Triple::WatchOS:
+ Platform = WatchOS;
+ break;
+ case llvm::Triple::XROS:
+ Platform = XROS;
+ break;
+ case llvm::Triple::DriverKit:
+ Platform = DriverKit;
+ break;
+ default:
+ // Unknown platform; leave uninitialized.
+ return;
+ }
+
+ DarwinEnvironmentKind Environment = NativeEnvironment;
+ if (getTriple().isSimulatorEnvironment())
+ Environment = Simulator;
+ else if (getTriple().isMacCatalystEnvironment())
+ Environment = MacCatalyst;
+
+ VersionTuple OsVer = getTriple().getOSVersion();
+ setTarget(Platform, Environment, OsVer.getMajor(),
+ OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
+ VersionTuple());
+}
+
AppleMachO::~AppleMachO() {}
MachO::~MachO() {}
@@ -1182,7 +1226,11 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
// If the target isn't initialized (e.g., an unknown Darwin platform, return
- // the default triple).
+ // the default triple). Note: we intentionally do NOT call
+ // ensureTargetInitialized() here because this method is called before
+ // AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
+ // and lazy init with version 0.0.0 would conflict with the real version
+ // that AddDeploymentTarget() later sets via setTarget().
if (!isTargetInitialized())
return Triple.getTriple();
@@ -1248,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
CC1Args.push_back("-Werror=undef-prefix");
// For modern targets, promote certain warnings to errors.
+ // Lazily initialize the target if needed (e.g. when Darwin is used as
+ // a host toolchain for device offloading).
+ ensureTargetInitialized();
+ if (!isTargetInitialized())
+ return;
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
// Always enable -Wdeprecated-objc-isa-usage and promote it
// to an error.
@@ -3399,6 +3452,12 @@ void Darwin::addClangTargetOptions(
MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind);
+ // When compiling device code (e.g. SPIR-V for HIP), skip host-specific
+ // flags like -faligned-alloc-unavailable and -fno-sized-deallocation
+ // that depend on the host OS version and are irrelevant to device code.
+ if (DeviceOffloadKind != Action::OFK_None)
+ return;
+
// Pass "-faligned-alloc-unavailable" only when the user hasn't manually
// enabled or disabled aligned allocations.
if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation,
@@ -3939,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
}
void Darwin::CheckObjCARC() const {
+ ensureTargetInitialized();
+ if (!isTargetInitialized())
+ return;
if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
(isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
return;
@@ -3958,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
Res |= SanitizerKind::FuzzerNoLink;
Res |= SanitizerKind::ObjCCast;
+ ensureTargetInitialized();
+ if (!isTargetInitialized())
+ return Res;
// Prior to 10.9, macOS shipped a version of the C++ standard library without
// C++11 support. The same is true of iOS prior to version 5. These OS'es are
// incompatible with -fsanitize=vptr.
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index 75f1dff46bfa9..89177b0455aca 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -391,6 +391,12 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO {
void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
const llvm::Triple Triple) const;
+protected:
+ /// Lazily initialize the target platform from the triple when
+ /// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
+ /// a host toolchain for device offloading).
+ void ensureTargetInitialized() const;
+
public:
Darwin(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index 1fcb36cc3a390..3bf0f23409f9f 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -409,9 +409,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
ObjStream << "# *** Automatically generated by Clang ***\n";
if (FoundPrimaryGpuBinHandleSymbol) {
// Define the first gpubin handle symbol
- if (HostTriple.isWindowsMSVCEnvironment())
+ if (HostTriple.isWindowsMSVCEnvironment()) {
ObjStream << " .section .hip_gpubin_handle,\"dw\"\n";
- else {
+ } else if (HostTriple.isMacOSX()) {
+ ObjStream << " .section __HIP,__gpubin_handle\n";
+ } else {
ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n";
ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ", at object\n";
ObjStream << " .section .hip_gpubin_handle,\"aw\"\n";
@@ -430,9 +432,12 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
}
if (FoundPrimaryHipFatbinSymbol) {
// Define the first fatbin symbol
- if (HostTriple.isWindowsMSVCEnvironment())
+ if (HostTriple.isWindowsMSVCEnvironment()) {
ObjStream << " .section .hip_fatbin,\"dw\"\n";
- else {
+ } else if (HostTriple.isMacOSX()) {
+ // Mach-O requires "segment,section" format
+ ObjStream << " .section __HIP,__hip_fatbin\n";
+ } else {
ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n";
ObjStream << " .type " << PrimaryHipFatbinSymbol << ", at object\n";
ObjStream << " .section .hip_fatbin,\"a\", at progbits\n";
>From f35d89db5a01a80b8c2deee988c56f46429b68a5 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Wed, 1 Apr 2026 09:11:23 +0300
Subject: [PATCH 2/8] [HIP][Darwin] Add Mach-O tests for offloading entries and
fat binary sections
Add tests that verify Mach-O section naming for HIP offloading entries
and fat binary embedding. These tests expose missing Mach-O support in
emitOffloadingEntry(), getOffloadEntryArray(), and the linker wrapper's
createFatbinDesc() for HIP.
---
clang/test/CodeGenCUDA/device-stub-macho.cu | 28 +++++++++++++++++
clang/test/CodeGenCUDA/offloading-entries.cu | 32 ++++++++++++++++++++
clang/test/Driver/linker-wrapper-image.c | 6 ++++
3 files changed, 66 insertions(+)
create mode 100644 clang/test/CodeGenCUDA/device-stub-macho.cu
diff --git a/clang/test/CodeGenCUDA/device-stub-macho.cu b/clang/test/CodeGenCUDA/device-stub-macho.cu
new file mode 100644
index 0000000000000..d53cefd58bfcd
--- /dev/null
+++ b/clang/test/CodeGenCUDA/device-stub-macho.cu
@@ -0,0 +1,28 @@
+// Verify that HIP fat binary sections use Mach-O "segment,section" format on Darwin.
+
+// RUN: echo -n "GPU binary would be here." > %t
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
+// RUN: -fcuda-include-gpubinary %t -o - -x hip \
+// RUN: | FileCheck %s --check-prefix=HIPEF
+// RUN: %clang_cc1 -cuid=123 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
+// RUN: -o - -x hip \
+// RUN: | FileCheck %s --check-prefix=HIPNEF
+
+#include "Inputs/cuda.h"
+
+__device__ int device_var;
+__constant__ int constant_var;
+
+// When fat binary is embedded, section names use Mach-O format.
+// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}section "__HIP,__hip_fatbin"{{.*}}align 4096
+// HIPEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
+// HIPEF-SAME: section "__HIP,__fatbin"
+
+// When fat binary is external (no -fcuda-include-gpubinary), external symbol uses Mach-O section.
+// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section "__HIP,__hip_fatbin"
+// HIPNEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
+// HIPNEF-SAME: section "__HIP,__fatbin"
+
+__global__ void kernelfunc(int i, int j, int k) {}
+
+void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index fe03cc83b9d21..86ef3bcfa556b 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -11,6 +11,9 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP-COFF %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-macosx10.15.0 -fgpu-rdc \
+// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: --check-prefix=HIP-MACHO %s
#include "Inputs/cuda.h"
@@ -75,6 +78,21 @@
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
//.
+// HIP-MACHO: @managed.managed = global i32 0, align 4
+// HIP-MACHO: @managed = externally_initialized global ptr null
+// HIP-MACHO: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "__LLVM,offload_entries"
+//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
@@ -103,6 +121,13 @@
// HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void
//
+// HIP-MACHO-LABEL: @_Z18__device_stub__foov(
+// HIP-MACHO-NEXT: entry:
+// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
+// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
+// HIP-MACHO: setup.end:
+// HIP-MACHO-NEXT: ret void
+//
__global__ void foo() {}
__device__ int var = 1;
const __device__ int constant = 1;
@@ -137,6 +162,13 @@ __device__ __managed__ int managed = 0;
// HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void
//
+// HIP-MACHO-LABEL: @_Z21__device_stub__kernelv(
+// HIP-MACHO-NEXT: entry:
+// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
+// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
+// HIP-MACHO: setup.end:
+// HIP-MACHO-NEXT: ret void
+//
__global__ void kernel() { external = 1; }
surface<void> surf;
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 2c0df8c6be925..282eaa71ba8da 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -162,6 +162,8 @@
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
+// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-apple-macosx10.15.0 \
+// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-MACHO
// HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
@@ -170,6 +172,10 @@
// HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA"
// HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ"
+// HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
+// HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
+// HIP-MACHO-NEXT: @__dummy.__LLVM,offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
+
// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-NEXT: @.hip.binary_handle = internal global ptr null
>From 94119c9c591969b7e9b5adce3cabdf865ddad0d9 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Wed, 1 Apr 2026 09:32:53 +0300
Subject: [PATCH 3/8] [HIP][Darwin] Add Mach-O support for offloading entries,
fat binary sections, and startup functions
- Utility.cpp: emitOffloadingEntry() now emits to "__LLVM,offload_entries"
section on Mach-O instead of the ELF-style "llvm_offload_entries".
- Utility.cpp: getOffloadEntryArray() uses Mach-O section boundary symbols
(\1section$start$/__LLVM$offload_entries) and creates a dummy variable in
the section to trigger linker symbol generation, following the same pattern
as SanitizerCoverage.
- OffloadWrapper.cpp: HIP fat binary sections now use Mach-O segment,section
format ("__HIP,__hip_fatbin", "__HIP,__fatbin").
- OffloadWrapper.cpp: All registration functions use "__TEXT,__StaticInit"
instead of ".text.startup" on Mach-O to avoid invalid section specifier
crashes during code generation.
---
clang/test/Driver/linker-wrapper-image.c | 28 +++++++----
.../llvm/Frontend/Offloading/Utility.h | 9 +++-
.../Frontend/Offloading/OffloadWrapper.cpp | 28 ++++++-----
llvm/lib/Frontend/Offloading/Utility.cpp | 49 ++++++++++++++++---
4 files changed, 85 insertions(+), 29 deletions(-)
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 282eaa71ba8da..731e2e860ff0b 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -174,15 +174,23 @@
// HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
-// HIP-MACHO-NEXT: @__dummy.__LLVM,offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
+// HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
-// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
-// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
-// HIP-NEXT: @.hip.binary_handle = internal global ptr null
+// HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null
+
+// HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null
+
+// HIP-MACHO: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section "__HIP,__hip_fatbin"
+// HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
+// HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
-// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+// HIP: define internal void @.hip.fatbin_reg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry:
// HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
// HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
@@ -191,20 +199,20 @@
// HIP-NEXT: ret void
// HIP-NEXT: }
//
-// HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+// HIP: define internal void @.hip.fatbin_unreg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry:
// HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
// HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
// HIP-NEXT: ret void
// HIP-NEXT: }
//
-// HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
+// HIP: define internal void @.hip.globals_reg(ptr %0) section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry:
-// HIP-NEXT: %1 = icmp ne ptr @__start_llvm_offload_entries, @__stop_llvm_offload_entries
+// HIP-NEXT: %1 = icmp ne ptr @{{.*offload_entries.*}}, @{{.*offload_entries.*}}
// HIP-NEXT: br i1 %1, label %while.entry, label %while.end
//
// HIP: while.entry:
-// HIP-NEXT: %entry1 = phi ptr [ @__start_llvm_offload_entries, %entry ], [ %16, %if.end ]
+// HIP-NEXT: %entry1 = phi ptr [ @{{.*offload_entries.*}}, %entry ], [ %16, %if.end ]
// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
// HIP-NEXT: %addr = load ptr, ptr %2, align 8
// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
@@ -264,7 +272,7 @@
//
// HIP: if.end:
// HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
-// HIP-NEXT: %17 = icmp eq ptr %16, @__stop_llvm_offload_entries
+// HIP-NEXT: %17 = icmp eq ptr %16, @{{.*offload_entries.*}}
// HIP-NEXT: br i1 %17, label %while.end, label %while.entry
//
// HIP: while.end:
diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index eb08e7ec661e4..e09620ee0a415 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -82,12 +82,17 @@ LLVM_ABI StructType *getEntryTy(Module &M);
/// \param Data Extra data storage associated with the entry.
/// \param SectionName The section this entry will be placed at.
/// \param AuxAddr An extra pointer if needed.
+/// Returns the section name for offloading entries based on the target triple.
+/// ELF: "llvm_offload_entries", COFF: "llvm_offload_entries",
+/// Mach-O: "__LLVM,offload_entries".
+LLVM_ABI StringRef getOffloadEntrySection(Module &M);
+
/// \return The emitted global variable containing the offloading entry.
LLVM_ABI GlobalVariable *
emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr,
StringRef Name, uint64_t Size, uint32_t Flags,
uint64_t Data, Constant *AuxAddr = nullptr,
- StringRef SectionName = "llvm_offload_entries");
+ StringRef SectionName = "");
/// Create a constant struct initializer used to register this global at
/// runtime.
@@ -100,7 +105,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
/// Creates a pair of globals used to iterate the array of offloading entries by
/// accessing the section variables provided by the linker.
LLVM_ABI std::pair<GlobalVariable *, GlobalVariable *>
-getOffloadEntryArray(Module &M, StringRef SectionName = "llvm_offload_entries");
+getOffloadEntryArray(Module &M, StringRef SectionName = "");
namespace amdgpu {
/// Check if an image is compatible with current system's environment. The
diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
index 83fc4c6e03178..60815e68f7e25 100644
--- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
+++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
@@ -44,6 +44,12 @@ IntegerType *getSizeTTy(Module &M) {
return M.getDataLayout().getIntPtrType(M.getContext());
}
+/// Returns the appropriate startup section for registration functions.
+/// Mach-O uses "__TEXT,__StaticInit"; ELF/COFF use ".text.startup".
+StringRef getStartupSection(const Triple &T) {
+ return T.isOSBinFormatMachO() ? "__TEXT,__StaticInit" : ".text.startup";
+}
+
// struct __tgt_device_image {
// void *ImageStart;
// void *ImageEnd;
@@ -207,7 +213,7 @@ Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
auto *Func =
Function::Create(FuncTy, GlobalValue::InternalLinkage,
".omp_offloading.descriptor_unreg" + Suffix, &M);
- Func->setSection(".text.startup");
+ Func->setSection(getStartupSection(M.getTargetTriple()));
// Get __tgt_unregister_lib function declaration.
auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -229,7 +235,7 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
".omp_offloading.descriptor_reg" + Suffix, &M);
- Func->setSection(".text.startup");
+ Func->setSection(getStartupSection(M.getTargetTriple()));
// Get __tgt_register_lib function declaration.
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -285,7 +291,7 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
// Create the global string containing the fatbinary.
StringRef FatbinConstantSection =
- IsHIP ? ".hip_fatbin"
+ IsHIP ? (Triple.isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin")
: (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
auto *Data = ConstantDataArray::get(C, Image);
auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
@@ -294,9 +300,9 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
Fatbin->setSection(FatbinConstantSection);
// Create the fatbinary wrapper
- StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
- : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
- : ".nvFatBinSegment";
+ StringRef FatbinWrapperSection =
+ IsHIP ? (Triple.isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment")
+ : (Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment");
Constant *FatbinWrapper[] = {
ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
ConstantInt::get(Type::getInt32Ty(C), 1),
@@ -403,7 +409,7 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
auto *RegGlobalsFn =
Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
- RegGlobalsFn->setSection(".text.startup");
+ RegGlobalsFn->setSection(getStartupSection(M.getTargetTriple()));
// Create the loop to register all the entries.
IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
@@ -559,13 +565,13 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
auto *CtorFunc = Function::Create(
CtorFuncTy, GlobalValue::InternalLinkage,
(IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
- CtorFunc->setSection(".text.startup");
+ CtorFunc->setSection(getStartupSection(M.getTargetTriple()));
auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *DtorFunc = Function::Create(
DtorFuncTy, GlobalValue::InternalLinkage,
(IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
- DtorFunc->setSection(".text.startup");
+ DtorFunc->setSection(getStartupSection(M.getTargetTriple()));
auto *PtrTy = PointerType::getUnqual(C);
@@ -702,7 +708,7 @@ class SYCLWrapper {
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
Twine("sycl") + ".descriptor_reg", &M);
- Func->setSection(".text.startup");
+ Func->setSection(getStartupSection(M.getTargetTriple()));
// Get RegFuncName function declaration.
FunctionType *RegFuncTy =
@@ -725,7 +731,7 @@ class SYCLWrapper {
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg", &M);
- Func->setSection(".text.startup");
+ Func->setSection(getStartupSection(M.getTargetTriple()));
// Get UnregFuncName function declaration.
FunctionType *UnRegFuncTy =
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 44cef91bac495..8993919bb4d83 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -83,6 +83,11 @@ offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
return {EntryInitializer, Str};
}
+StringRef offloading::getOffloadEntrySection(Module &M) {
+ return M.getTargetTriple().isOSBinFormatMachO() ? "__LLVM,offload_entries"
+ : "llvm_offload_entries";
+}
+
GlobalVariable *
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
Constant *Addr, StringRef Name, uint64_t Size,
@@ -90,6 +95,9 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
Constant *AuxAddr, StringRef SectionName) {
const llvm::Triple &Triple = M.getTargetTriple();
+ if (SectionName.empty())
+ SectionName = getOffloadEntrySection(M);
+
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
@@ -114,6 +122,9 @@ std::pair<GlobalVariable *, GlobalVariable *>
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
const llvm::Triple &Triple = M.getTargetTriple();
+ if (SectionName.empty())
+ SectionName = getOffloadEntrySection(M);
+
auto *ZeroInitilaizer =
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
@@ -121,13 +132,30 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
: GlobalValue::ExternalLinkage;
- auto *EntriesB =
- new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
- "__start_" + SectionName);
+ // Mach-O uses linker-defined section boundary symbols with a special naming
+ // convention: \1section$start$<segment>$<section> (the \1 prefix suppresses
+ // the leading underscore that Mach-O normally adds to C symbols).
+ // See SanitizerCoverage for the existing precedent:
+ // llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+ // compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c
+ std::string StartName, StopName;
+ if (Triple.isOSBinFormatMachO()) {
+ // Mach-O section name is "segment,section" — convert commas to '$' for
+ // the linker symbol convention.
+ std::string SymSection = SectionName.str();
+ std::replace(SymSection.begin(), SymSection.end(), ',', '$');
+ StartName = "\1section$start$" + SymSection;
+ StopName = "\1section$end$" + SymSection;
+ } else {
+ StartName = ("__start_" + SectionName).str();
+ StopName = ("__stop_" + SectionName).str();
+ }
+
+ auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
+ Linkage, EntryInit, StartName);
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
- auto *EntriesE =
- new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
- "__stop_" + SectionName);
+ auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
+ Linkage, EntryInit, StopName);
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
if (Triple.isOSBinFormatELF()) {
@@ -141,6 +169,15 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
DummyEntry->setSection(SectionName);
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
appendToCompilerUsed(M, DummyEntry);
+ } else if (Triple.isOSBinFormatMachO()) {
+ // Mach-O needs a dummy variable in the section (like ELF) to ensure the
+ // linker provides the section boundary symbols.
+ auto *DummyEntry = new GlobalVariable(
+ M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
+ ZeroInitilaizer, "__dummy." + SectionName);
+ DummyEntry->setSection(SectionName);
+ DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
+ appendToCompilerUsed(M, DummyEntry);
} else {
// The COFF linker will merge sections containing a '$' together into a
// single section. The order of entries in this section will be sorted
>From 43b4eeee5221da7b98018dd918c0497c5f76f099 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Wed, 1 Apr 2026 19:04:40 +0300
Subject: [PATCH 4/8] [NFC] Extract getOffloadEntryBoundarySymbols helper in
Utility.cpp
Extract the Mach-O/ELF/COFF section boundary symbol name derivation
into a static helper function, replacing inline logic in
getOffloadEntryArray().
---
llvm/lib/Frontend/Offloading/Utility.cpp | 33 +++++++++++-------------
1 file changed, 15 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 8993919bb4d83..76154b7cc2c56 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -88,6 +88,19 @@ StringRef offloading::getOffloadEntrySection(Module &M) {
: "llvm_offload_entries";
}
+/// Returns the start/end symbol names for iterating offloading entries in a
+/// given section. Mach-O uses \1section$start$/\1section$end$ convention;
+/// ELF/COFF use __start_/__stop_ prefixes.
+static std::pair<std::string, std::string>
+getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) {
+ if (T.isOSBinFormatMachO()) {
+ std::string SymSection = SectionName.str();
+ std::replace(SymSection.begin(), SymSection.end(), ',', '$');
+ return {"\1section$start$" + SymSection, "\1section$end$" + SymSection};
+ }
+ return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()};
+}
+
GlobalVariable *
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
Constant *Addr, StringRef Name, uint64_t Size,
@@ -132,24 +145,8 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
: GlobalValue::ExternalLinkage;
- // Mach-O uses linker-defined section boundary symbols with a special naming
- // convention: \1section$start$<segment>$<section> (the \1 prefix suppresses
- // the leading underscore that Mach-O normally adds to C symbols).
- // See SanitizerCoverage for the existing precedent:
- // llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
- // compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c
- std::string StartName, StopName;
- if (Triple.isOSBinFormatMachO()) {
- // Mach-O section name is "segment,section" — convert commas to '$' for
- // the linker symbol convention.
- std::string SymSection = SectionName.str();
- std::replace(SymSection.begin(), SymSection.end(), ',', '$');
- StartName = "\1section$start$" + SymSection;
- StopName = "\1section$end$" + SymSection;
- } else {
- StartName = ("__start_" + SectionName).str();
- StopName = ("__stop_" + SectionName).str();
- }
+ auto [StartName, StopName] =
+ getOffloadEntryBoundarySymbols(Triple, SectionName);
auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
Linkage, EntryInit, StartName);
>From 9bb73bb27f07de572866540f98246d0b241bc6fb Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Wed, 1 Apr 2026 19:28:10 +0300
Subject: [PATCH 5/8] [Offloading] Remove SectionName parameter from
emitOffloadingEntry and getOffloadEntryArray
getOffloadEntrySection() is now the single source of truth for the
offloading entry section name. No caller was passing a custom section
name, so the parameter is removed rather than kept as dead API surface.
---
llvm/include/llvm/Frontend/Offloading/Utility.h | 5 ++---
llvm/lib/Frontend/Offloading/Utility.cpp | 12 ++++--------
2 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index e09620ee0a415..6a268d0cd0b1f 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -91,8 +91,7 @@ LLVM_ABI StringRef getOffloadEntrySection(Module &M);
LLVM_ABI GlobalVariable *
emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr,
StringRef Name, uint64_t Size, uint32_t Flags,
- uint64_t Data, Constant *AuxAddr = nullptr,
- StringRef SectionName = "");
+ uint64_t Data, Constant *AuxAddr = nullptr);
/// Create a constant struct initializer used to register this global at
/// runtime.
@@ -105,7 +104,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
/// Creates a pair of globals used to iterate the array of offloading entries by
/// accessing the section variables provided by the linker.
LLVM_ABI std::pair<GlobalVariable *, GlobalVariable *>
-getOffloadEntryArray(Module &M, StringRef SectionName = "");
+getOffloadEntryArray(Module &M);
namespace amdgpu {
/// Check if an image is compatible with current system's environment. The
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 76154b7cc2c56..a43b2e6178491 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -105,11 +105,9 @@ GlobalVariable *
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
Constant *Addr, StringRef Name, uint64_t Size,
uint32_t Flags, uint64_t Data,
- Constant *AuxAddr, StringRef SectionName) {
+ Constant *AuxAddr) {
const llvm::Triple &Triple = M.getTargetTriple();
-
- if (SectionName.empty())
- SectionName = getOffloadEntrySection(M);
+ StringRef SectionName = getOffloadEntrySection(M);
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
@@ -132,11 +130,9 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
}
std::pair<GlobalVariable *, GlobalVariable *>
-offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
+offloading::getOffloadEntryArray(Module &M) {
const llvm::Triple &Triple = M.getTargetTriple();
-
- if (SectionName.empty())
- SectionName = getOffloadEntrySection(M);
+ StringRef SectionName = getOffloadEntrySection(M);
auto *ZeroInitilaizer =
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
>From ce86101a969199a755f33356749edcb26173bedc Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Thu, 2 Apr 2026 15:47:44 +0300
Subject: [PATCH 6/8] [HIP][Darwin] Add Mach-O test for llvm-offload-wrapper
Add HIP-MACHO run to offload-wrapper.ll that verifies Mach-O section
names, boundary symbols, and startup sections in the wrapping output.
---
llvm/lib/Frontend/Offloading/Utility.cpp | 8 ++----
.../llvm-offload-wrapper/offload-wrapper.ll | 28 +++++++++++++++++++
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index a43b2e6178491..73048881b66de 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -101,11 +101,9 @@ getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) {
return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()};
}
-GlobalVariable *
-offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
- Constant *Addr, StringRef Name, uint64_t Size,
- uint32_t Flags, uint64_t Data,
- Constant *AuxAddr) {
+GlobalVariable *offloading::emitOffloadingEntry(
+ Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name,
+ uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr) {
const llvm::Triple &Triple = M.getTargetTriple();
StringRef SectionName = getOffloadEntrySection(M);
diff --git a/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll
index 32aad0b6cf64e..b9d2d86eda375 100644
--- a/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll
+++ b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll
@@ -51,6 +51,34 @@
; HIP-NEXT: ret void
; HIP-NEXT: }
+; RUN: llvm-offload-wrapper --triple=x86_64-apple-macosx10.15.0 -kind=hip %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP-MACHO
+
+; HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
+; HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
+; HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries", align 8
+; HIP-MACHO-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @"__dummy.__LLVM,offload_entries"], section "llvm.metadata"
+; HIP-MACHO-NEXT: @.fatbin_image = internal constant {{.*}}, section "__HIP,__hip_fatbin"
+; HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
+; HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
+; HIP-MACHO-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
+
+; HIP-MACHO: define internal void @.hip.fatbin_reg() section "__TEXT,__StaticInit" {
+; HIP-MACHO-NEXT: entry:
+; HIP-MACHO-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+; HIP-MACHO-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
+; HIP-MACHO-NEXT: call void @.hip.globals_reg(ptr %0)
+; HIP-MACHO-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+; HIP-MACHO-NEXT: ret void
+; HIP-MACHO-NEXT: }
+
+; HIP-MACHO: define internal void @.hip.fatbin_unreg() section "__TEXT,__StaticInit" {
+; HIP-MACHO-NEXT: entry:
+; HIP-MACHO-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
+; HIP-MACHO-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
+; HIP-MACHO-NEXT: ret void
+; HIP-MACHO-NEXT: }
+
; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
>From 32f99bf0e0feb1123456dcbd40e22f7e9ea815dd Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Fri, 3 Apr 2026 09:57:17 +0300
Subject: [PATCH 7/8] [Darwin] Initialize target platform in constructor from
triple
Instead of lazily initializing the Darwin target platform via
ensureTargetInitialized() guards scattered across multiple methods,
initialize it directly in the constructor from the triple. This
matches how other toolchains (Linux, Windows) work and ensures
the target is always initialized, even when Darwin is used as a
host toolchain for HIP/CUDA offloading where TranslateArgs /
AddDeploymentTarget may not run.
AddDeploymentTarget may later re-initialize with refined values
from command-line arguments and SDK detection. The setTarget assert
is relaxed to allow this re-initialization.
This removes ensureTargetInitialized() and all its callsites.
---
clang/lib/Driver/ToolChains/Darwin.cpp | 108 +++++++++++--------------
clang/lib/Driver/ToolChains/Darwin.h | 9 +--
2 files changed, 48 insertions(+), 69 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 61b779c60b90f..ddfe8ee981169 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -979,7 +979,50 @@ AppleMachO::AppleMachO(const Driver &D, const llvm::Triple &Triple,
/// Darwin - Darwin tool chain for i386 and x86_64.
Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
- : AppleMachO(D, Triple, Args), TargetInitialized(false) {}
+ : AppleMachO(D, Triple, Args), TargetInitialized(false) {
+ // Initialize the target platform from the triple. This provides a baseline
+ // so that methods like isTargetMacOSBased() work even if TranslateArgs /
+ // AddDeploymentTarget hasn't run yet (e.g. when Darwin is used as a host
+ // toolchain for HIP/CUDA offloading where BoundArch is empty).
+ // AddDeploymentTarget may later re-initialize with refined values from
+ // command-line arguments and SDK detection.
+ llvm::Triple::OSType OS = Triple.getOS();
+ DarwinPlatformKind Platform;
+ switch (OS) {
+ case llvm::Triple::Darwin:
+ case llvm::Triple::MacOSX:
+ Platform = MacOS;
+ break;
+ case llvm::Triple::IOS:
+ Platform = IPhoneOS;
+ break;
+ case llvm::Triple::TvOS:
+ Platform = TvOS;
+ break;
+ case llvm::Triple::WatchOS:
+ Platform = WatchOS;
+ break;
+ case llvm::Triple::XROS:
+ Platform = XROS;
+ break;
+ case llvm::Triple::DriverKit:
+ Platform = DriverKit;
+ break;
+ default:
+ return;
+ }
+
+ DarwinEnvironmentKind Environment = NativeEnvironment;
+ if (Triple.isSimulatorEnvironment())
+ Environment = Simulator;
+ else if (Triple.isMacCatalystEnvironment())
+ Environment = MacCatalyst;
+
+ VersionTuple OsVer = Triple.getOSVersion();
+ setTarget(Platform, Environment, OsVer.getMajor(),
+ OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
+ VersionTuple());
+}
types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
types::ID Ty = ToolChain::LookupTypeForExtension(Ext);
@@ -1140,50 +1183,6 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
Darwin::~Darwin() {}
-void Darwin::ensureTargetInitialized() const {
- if (TargetInitialized)
- return;
-
- llvm::Triple::OSType OS = getTriple().getOS();
-
- DarwinPlatformKind Platform;
- switch (OS) {
- case llvm::Triple::Darwin:
- case llvm::Triple::MacOSX:
- Platform = MacOS;
- break;
- case llvm::Triple::IOS:
- Platform = IPhoneOS;
- break;
- case llvm::Triple::TvOS:
- Platform = TvOS;
- break;
- case llvm::Triple::WatchOS:
- Platform = WatchOS;
- break;
- case llvm::Triple::XROS:
- Platform = XROS;
- break;
- case llvm::Triple::DriverKit:
- Platform = DriverKit;
- break;
- default:
- // Unknown platform; leave uninitialized.
- return;
- }
-
- DarwinEnvironmentKind Environment = NativeEnvironment;
- if (getTriple().isSimulatorEnvironment())
- Environment = Simulator;
- else if (getTriple().isMacCatalystEnvironment())
- Environment = MacCatalyst;
-
- VersionTuple OsVer = getTriple().getOSVersion();
- setTarget(Platform, Environment, OsVer.getMajor(),
- OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
- VersionTuple());
-}
-
AppleMachO::~AppleMachO() {}
MachO::~MachO() {}
@@ -1225,12 +1224,8 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
- // If the target isn't initialized (e.g., an unknown Darwin platform, return
- // the default triple). Note: we intentionally do NOT call
- // ensureTargetInitialized() here because this method is called before
- // AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
- // and lazy init with version 0.0.0 would conflict with the real version
- // that AddDeploymentTarget() later sets via setTarget().
+ // If the target isn't initialized (e.g., an unknown Darwin platform), return
+ // the default triple.
if (!isTargetInitialized())
return Triple.getTriple();
@@ -1296,11 +1291,6 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
CC1Args.push_back("-Werror=undef-prefix");
// For modern targets, promote certain warnings to errors.
- // Lazily initialize the target if needed (e.g. when Darwin is used as
- // a host toolchain for device offloading).
- ensureTargetInitialized();
- if (!isTargetInitialized())
- return;
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
// Always enable -Wdeprecated-objc-isa-usage and promote it
// to an error.
@@ -3998,9 +3988,6 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
}
void Darwin::CheckObjCARC() const {
- ensureTargetInitialized();
- if (!isTargetInitialized())
- return;
if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
(isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
return;
@@ -4020,9 +4007,6 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
Res |= SanitizerKind::FuzzerNoLink;
Res |= SanitizerKind::ObjCCast;
- ensureTargetInitialized();
- if (!isTargetInitialized())
- return Res;
// Prior to 10.9, macOS shipped a version of the C++ standard library without
// C++11 support. The same is true of iOS prior to version 5. These OS'es are
// incompatible with -fsanitize=vptr.
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index 89177b0455aca..60d724ca5d059 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -391,12 +391,6 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO {
void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
const llvm::Triple Triple) const;
-protected:
- /// Lazily initialize the target platform from the triple when
- /// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
- /// a host toolchain for device offloading).
- void ensureTargetInitialized() const;
-
public:
Darwin(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
@@ -449,7 +443,8 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO {
VersionTuple(Major, Minor, Micro))
return;
- assert(!TargetInitialized && "Target already initialized!");
+ // Allow re-initialization: the constructor sets a baseline from the triple,
+ // and AddDeploymentTarget may later refine it with args/SDK info.
TargetInitialized = true;
TargetPlatform = Platform;
TargetEnvironment = Environment;
>From 45e44b068e40d9b5b19627c88cf44e2385e35d66 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Fri, 3 Apr 2026 13:56:57 +0300
Subject: [PATCH 8/8] Revert "[Darwin] Initialize target platform in
constructor from triple"
This reverts commit 32f99bf0e0feb1123456dcbd40e22f7e9ea815dd.
---
clang/lib/Driver/ToolChains/Darwin.cpp | 108 ++++++++++++++-----------
clang/lib/Driver/ToolChains/Darwin.h | 9 ++-
2 files changed, 69 insertions(+), 48 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index ddfe8ee981169..61b779c60b90f 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -979,50 +979,7 @@ AppleMachO::AppleMachO(const Driver &D, const llvm::Triple &Triple,
/// Darwin - Darwin tool chain for i386 and x86_64.
Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
- : AppleMachO(D, Triple, Args), TargetInitialized(false) {
- // Initialize the target platform from the triple. This provides a baseline
- // so that methods like isTargetMacOSBased() work even if TranslateArgs /
- // AddDeploymentTarget hasn't run yet (e.g. when Darwin is used as a host
- // toolchain for HIP/CUDA offloading where BoundArch is empty).
- // AddDeploymentTarget may later re-initialize with refined values from
- // command-line arguments and SDK detection.
- llvm::Triple::OSType OS = Triple.getOS();
- DarwinPlatformKind Platform;
- switch (OS) {
- case llvm::Triple::Darwin:
- case llvm::Triple::MacOSX:
- Platform = MacOS;
- break;
- case llvm::Triple::IOS:
- Platform = IPhoneOS;
- break;
- case llvm::Triple::TvOS:
- Platform = TvOS;
- break;
- case llvm::Triple::WatchOS:
- Platform = WatchOS;
- break;
- case llvm::Triple::XROS:
- Platform = XROS;
- break;
- case llvm::Triple::DriverKit:
- Platform = DriverKit;
- break;
- default:
- return;
- }
-
- DarwinEnvironmentKind Environment = NativeEnvironment;
- if (Triple.isSimulatorEnvironment())
- Environment = Simulator;
- else if (Triple.isMacCatalystEnvironment())
- Environment = MacCatalyst;
-
- VersionTuple OsVer = Triple.getOSVersion();
- setTarget(Platform, Environment, OsVer.getMajor(),
- OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
- VersionTuple());
-}
+ : AppleMachO(D, Triple, Args), TargetInitialized(false) {}
types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
types::ID Ty = ToolChain::LookupTypeForExtension(Ext);
@@ -1183,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
Darwin::~Darwin() {}
+void Darwin::ensureTargetInitialized() const {
+ if (TargetInitialized)
+ return;
+
+ llvm::Triple::OSType OS = getTriple().getOS();
+
+ DarwinPlatformKind Platform;
+ switch (OS) {
+ case llvm::Triple::Darwin:
+ case llvm::Triple::MacOSX:
+ Platform = MacOS;
+ break;
+ case llvm::Triple::IOS:
+ Platform = IPhoneOS;
+ break;
+ case llvm::Triple::TvOS:
+ Platform = TvOS;
+ break;
+ case llvm::Triple::WatchOS:
+ Platform = WatchOS;
+ break;
+ case llvm::Triple::XROS:
+ Platform = XROS;
+ break;
+ case llvm::Triple::DriverKit:
+ Platform = DriverKit;
+ break;
+ default:
+ // Unknown platform; leave uninitialized.
+ return;
+ }
+
+ DarwinEnvironmentKind Environment = NativeEnvironment;
+ if (getTriple().isSimulatorEnvironment())
+ Environment = Simulator;
+ else if (getTriple().isMacCatalystEnvironment())
+ Environment = MacCatalyst;
+
+ VersionTuple OsVer = getTriple().getOSVersion();
+ setTarget(Platform, Environment, OsVer.getMajor(),
+ OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
+ VersionTuple());
+}
+
AppleMachO::~AppleMachO() {}
MachO::~MachO() {}
@@ -1224,8 +1225,12 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
- // If the target isn't initialized (e.g., an unknown Darwin platform), return
- // the default triple.
+ // If the target isn't initialized (e.g., an unknown Darwin platform, return
+ // the default triple). Note: we intentionally do NOT call
+ // ensureTargetInitialized() here because this method is called before
+ // AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
+ // and lazy init with version 0.0.0 would conflict with the real version
+ // that AddDeploymentTarget() later sets via setTarget().
if (!isTargetInitialized())
return Triple.getTriple();
@@ -1291,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
CC1Args.push_back("-Werror=undef-prefix");
// For modern targets, promote certain warnings to errors.
+ // Lazily initialize the target if needed (e.g. when Darwin is used as
+ // a host toolchain for device offloading).
+ ensureTargetInitialized();
+ if (!isTargetInitialized())
+ return;
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
// Always enable -Wdeprecated-objc-isa-usage and promote it
// to an error.
@@ -3988,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
}
void Darwin::CheckObjCARC() const {
+ ensureTargetInitialized();
+ if (!isTargetInitialized())
+ return;
if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
(isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
return;
@@ -4007,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
Res |= SanitizerKind::FuzzerNoLink;
Res |= SanitizerKind::ObjCCast;
+ ensureTargetInitialized();
+ if (!isTargetInitialized())
+ return Res;
// Prior to 10.9, macOS shipped a version of the C++ standard library without
// C++11 support. The same is true of iOS prior to version 5. These OS'es are
// incompatible with -fsanitize=vptr.
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index 60d724ca5d059..89177b0455aca 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -391,6 +391,12 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO {
void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
const llvm::Triple Triple) const;
+protected:
+ /// Lazily initialize the target platform from the triple when
+ /// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
+ /// a host toolchain for device offloading).
+ void ensureTargetInitialized() const;
+
public:
Darwin(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
@@ -443,8 +449,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO {
VersionTuple(Major, Minor, Micro))
return;
- // Allow re-initialization: the constructor sets a baseline from the triple,
- // and AddDeploymentTarget may later refine it with args/SDK info.
+ assert(!TargetInitialized && "Target already initialized!");
TargetInitialized = true;
TargetPlatform = Platform;
TargetEnvironment = Environment;
More information about the llvm-commits
mailing list