[clang] [llvm] [HIP][MacOS] Mach-O support and Darwin toolchain fixes (PR #183991)

Paulius Velesko via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 07:05:03 PDT 2026


https://github.com/pvelesko updated https://github.com/llvm/llvm-project/pull/183991

>From 45ce6a73db8983607a3a62119c98d7db3c5e5292 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Sun, 1 Mar 2026 10:59:45 +0200
Subject: [PATCH 1/3] [Darwin] MacOS combatability with HIPSPV

CGCUDANV, HIPUtility: Use Mach-O segment,section format for HIP on macOS

[Driver][Darwin] Add ensureTargetInitialized() for lazy target init from triple

When Darwin is used as a host toolchain for device offloading (e.g.
HIP/CUDA), TranslateArgs may not run before methods that query the
target platform, leaving TargetInitialized false and triggering asserts.

Add ensureTargetInitialized() which infers the platform and version from
the triple, and call it from ComputeEffectiveClangTriple,
addClangWarningOptions, CheckObjCARC, and getSupportedSanitizers instead
of the previous isTargetInitialized() bail-out guards.

[Driver][Darwin] Skip host-stdlib flags when compiling device code

When DeviceOffloadKind != OFK_None, return early from
Darwin::addClangTargetOptions() after MachO flags. This skips
host-specific flags like -faligned-alloc-unavailable and
-fno-sized-deallocation that are irrelevant to device compilation
and break SPIR-V builds.

Add macOS handling for GPU bin handle section in HIP fat binary
---
 clang/lib/CodeGen/CGCUDANV.cpp             | 10 +++-
 clang/lib/Driver/ToolChains/Darwin.cpp     | 67 +++++++++++++++++++++-
 clang/lib/Driver/ToolChains/Darwin.h       |  6 ++
 clang/lib/Driver/ToolChains/HIPUtility.cpp | 13 +++--
 4 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index e04da90b3cbf6..f08040d1d3d15 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -817,10 +817,14 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
   llvm::Constant *FatBinStr;
   unsigned FatMagic;
   if (IsHIP) {
-    FatbinConstantName = ".hip_fatbin";
-    FatbinSectionName = ".hipFatBinSegment";
+    // On macOS (Mach-O), section names must be in "segment,section" format.
+    FatbinConstantName =
+        CGM.getTriple().isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin";
+    FatbinSectionName =
+        CGM.getTriple().isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment";
 
-    ModuleIDSectionName = "__hip_module_id";
+    ModuleIDSectionName =
+        CGM.getTriple().isMacOSX() ? "__HIP,__module_id" : "__hip_module_id";
     ModuleIDPrefix = "__hip_";
 
     if (CudaGpuBinary) {
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 8b000845fea18..61b779c60b90f 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1140,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
 
 Darwin::~Darwin() {}
 
+void Darwin::ensureTargetInitialized() const {
+  if (TargetInitialized)
+    return;
+
+  llvm::Triple::OSType OS = getTriple().getOS();
+
+  DarwinPlatformKind Platform;
+  switch (OS) {
+  case llvm::Triple::Darwin:
+  case llvm::Triple::MacOSX:
+    Platform = MacOS;
+    break;
+  case llvm::Triple::IOS:
+    Platform = IPhoneOS;
+    break;
+  case llvm::Triple::TvOS:
+    Platform = TvOS;
+    break;
+  case llvm::Triple::WatchOS:
+    Platform = WatchOS;
+    break;
+  case llvm::Triple::XROS:
+    Platform = XROS;
+    break;
+  case llvm::Triple::DriverKit:
+    Platform = DriverKit;
+    break;
+  default:
+    // Unknown platform; leave uninitialized.
+    return;
+  }
+
+  DarwinEnvironmentKind Environment = NativeEnvironment;
+  if (getTriple().isSimulatorEnvironment())
+    Environment = Simulator;
+  else if (getTriple().isMacCatalystEnvironment())
+    Environment = MacCatalyst;
+
+  VersionTuple OsVer = getTriple().getOSVersion();
+  setTarget(Platform, Environment, OsVer.getMajor(),
+            OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
+            VersionTuple());
+}
+
 AppleMachO::~AppleMachO() {}
 
 MachO::~MachO() {}
@@ -1182,7 +1226,11 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
   llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
 
   // If the target isn't initialized (e.g., an unknown Darwin platform, return
-  // the default triple).
+  // the default triple). Note: we intentionally do NOT call
+  // ensureTargetInitialized() here because this method is called before
+  // AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
+  // and lazy init with version 0.0.0 would conflict with the real version
+  // that AddDeploymentTarget() later sets via setTarget().
   if (!isTargetInitialized())
     return Triple.getTriple();
 
@@ -1248,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
   CC1Args.push_back("-Werror=undef-prefix");
 
   // For modern targets, promote certain warnings to errors.
+  // Lazily initialize the target if needed (e.g. when Darwin is used as
+  // a host toolchain for device offloading).
+  ensureTargetInitialized();
+  if (!isTargetInitialized())
+    return;
   if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
     // Always enable -Wdeprecated-objc-isa-usage and promote it
     // to an error.
@@ -3399,6 +3452,12 @@ void Darwin::addClangTargetOptions(
 
   MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind);
 
+  // When compiling device code (e.g. SPIR-V for HIP), skip host-specific
+  // flags like -faligned-alloc-unavailable and -fno-sized-deallocation
+  // that depend on the host OS version and are irrelevant to device code.
+  if (DeviceOffloadKind != Action::OFK_None)
+    return;
+
   // Pass "-faligned-alloc-unavailable" only when the user hasn't manually
   // enabled or disabled aligned allocations.
   if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation,
@@ -3939,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
 }
 
 void Darwin::CheckObjCARC() const {
+  ensureTargetInitialized();
+  if (!isTargetInitialized())
+    return;
   if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
       (isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
     return;
@@ -3958,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
   Res |= SanitizerKind::FuzzerNoLink;
   Res |= SanitizerKind::ObjCCast;
 
+  ensureTargetInitialized();
+  if (!isTargetInitialized())
+    return Res;
   // Prior to 10.9, macOS shipped a version of the C++ standard library without
   // C++11 support. The same is true of iOS prior to version 5. These OS'es are
   // incompatible with -fsanitize=vptr.
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index 75f1dff46bfa9..89177b0455aca 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -391,6 +391,12 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO {
   void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
                           const llvm::Triple Triple) const;
 
+protected:
+  /// Lazily initialize the target platform from the triple when
+  /// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
+  /// a host toolchain for device offloading).
+  void ensureTargetInitialized() const;
+
 public:
   Darwin(const Driver &D, const llvm::Triple &Triple,
          const llvm::opt::ArgList &Args);
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index 1fcb36cc3a390..3bf0f23409f9f 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -409,9 +409,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
   ObjStream << "# *** Automatically generated by Clang ***\n";
   if (FoundPrimaryGpuBinHandleSymbol) {
     // Define the first gpubin handle symbol
-    if (HostTriple.isWindowsMSVCEnvironment())
+    if (HostTriple.isWindowsMSVCEnvironment()) {
       ObjStream << "  .section .hip_gpubin_handle,\"dw\"\n";
-    else {
+    } else if (HostTriple.isMacOSX()) {
+      ObjStream << "  .section __HIP,__gpubin_handle\n";
+    } else {
       ObjStream << "  .protected " << PrimaryGpuBinHandleSymbol << "\n";
       ObjStream << "  .type " << PrimaryGpuBinHandleSymbol << ", at object\n";
       ObjStream << "  .section .hip_gpubin_handle,\"aw\"\n";
@@ -430,9 +432,12 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
   }
   if (FoundPrimaryHipFatbinSymbol) {
     // Define the first fatbin symbol
-    if (HostTriple.isWindowsMSVCEnvironment())
+    if (HostTriple.isWindowsMSVCEnvironment()) {
       ObjStream << "  .section .hip_fatbin,\"dw\"\n";
-    else {
+    } else if (HostTriple.isMacOSX()) {
+      // Mach-O requires "segment,section" format
+      ObjStream << "  .section __HIP,__hip_fatbin\n";
+    } else {
       ObjStream << "  .protected " << PrimaryHipFatbinSymbol << "\n";
       ObjStream << "  .type " << PrimaryHipFatbinSymbol << ", at object\n";
       ObjStream << "  .section .hip_fatbin,\"a\", at progbits\n";

>From f35d89db5a01a80b8c2deee988c56f46429b68a5 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Wed, 1 Apr 2026 09:11:23 +0300
Subject: [PATCH 2/3] [HIP][Darwin] Add Mach-O tests for offloading entries and
 fat binary sections

Add tests that verify Mach-O section naming for HIP offloading entries
and fat binary embedding. These tests expose missing Mach-O support in
emitOffloadingEntry(), getOffloadEntryArray(), and the linker wrapper's
createFatbinDesc() for HIP.
---
 clang/test/CodeGenCUDA/device-stub-macho.cu  | 28 +++++++++++++++++
 clang/test/CodeGenCUDA/offloading-entries.cu | 32 ++++++++++++++++++++
 clang/test/Driver/linker-wrapper-image.c     |  6 ++++
 3 files changed, 66 insertions(+)
 create mode 100644 clang/test/CodeGenCUDA/device-stub-macho.cu

diff --git a/clang/test/CodeGenCUDA/device-stub-macho.cu b/clang/test/CodeGenCUDA/device-stub-macho.cu
new file mode 100644
index 0000000000000..d53cefd58bfcd
--- /dev/null
+++ b/clang/test/CodeGenCUDA/device-stub-macho.cu
@@ -0,0 +1,28 @@
+// Verify that HIP fat binary sections use Mach-O "segment,section" format on Darwin.
+
+// RUN: echo -n "GPU binary would be here." > %t
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
+// RUN:     -fcuda-include-gpubinary %t -o - -x hip \
+// RUN:   | FileCheck %s --check-prefix=HIPEF
+// RUN: %clang_cc1 -cuid=123 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
+// RUN:     -o - -x hip \
+// RUN:   | FileCheck %s --check-prefix=HIPNEF
+
+#include "Inputs/cuda.h"
+
+__device__ int device_var;
+__constant__ int constant_var;
+
+// When fat binary is embedded, section names use Mach-O format.
+// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}section "__HIP,__hip_fatbin"{{.*}}align 4096
+// HIPEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
+// HIPEF-SAME: section "__HIP,__fatbin"
+
+// When fat binary is external (no -fcuda-include-gpubinary), external symbol uses Mach-O section.
+// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section "__HIP,__hip_fatbin"
+// HIPNEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
+// HIPNEF-SAME: section "__HIP,__fatbin"
+
+__global__ void kernelfunc(int i, int j, int k) {}
+
+void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index fe03cc83b9d21..86ef3bcfa556b 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -11,6 +11,9 @@
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
 // RUN:   --offload-new-driver -emit-llvm -o - -x hip  %s | FileCheck \
 // RUN:   --check-prefix=HIP-COFF %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-macosx10.15.0 -fgpu-rdc \
+// RUN:   --offload-new-driver -emit-llvm -o - -x hip  %s | FileCheck \
+// RUN:   --check-prefix=HIP-MACHO %s
 
 #include "Inputs/cuda.h"
 
@@ -75,6 +78,21 @@
 // HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
 // HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
 //.
+// HIP-MACHO: @managed.managed = global i32 0, align 4
+// HIP-MACHO: @managed = externally_initialized global ptr null
+// HIP-MACHO: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "__LLVM,offload_entries"
+// HIP-MACHO: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
+// HIP-MACHO: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "__LLVM,offload_entries"
+//.
 // CUDA-LABEL: @_Z18__device_stub__foov(
 // CUDA-NEXT:  entry:
 // CUDA-NEXT:    [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
@@ -103,6 +121,13 @@
 // HIP-COFF:       setup.end:
 // HIP-COFF-NEXT:    ret void
 //
+// HIP-MACHO-LABEL: @_Z18__device_stub__foov(
+// HIP-MACHO-NEXT:  entry:
+// HIP-MACHO-NEXT:    [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
+// HIP-MACHO-NEXT:    br label [[SETUP_END:%.*]]
+// HIP-MACHO:       setup.end:
+// HIP-MACHO-NEXT:    ret void
+//
 __global__ void foo() {}
 __device__ int var = 1;
 const __device__ int constant = 1;
@@ -137,6 +162,13 @@ __device__ __managed__ int managed = 0;
 // HIP-COFF:       setup.end:
 // HIP-COFF-NEXT:    ret void
 //
+// HIP-MACHO-LABEL: @_Z21__device_stub__kernelv(
+// HIP-MACHO-NEXT:  entry:
+// HIP-MACHO-NEXT:    [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
+// HIP-MACHO-NEXT:    br label [[SETUP_END:%.*]]
+// HIP-MACHO:       setup.end:
+// HIP-MACHO-NEXT:    ret void
+//
 __global__ void kernel() { external = 1; }
 
 surface<void> surf;
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 2c0df8c6be925..282eaa71ba8da 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -162,6 +162,8 @@
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
+// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-apple-macosx10.15.0 \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-MACHO
 
 //      HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
 // HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
@@ -170,6 +172,10 @@
 //      HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA"
 // HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ"
 
+//      HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
+// HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
+// HIP-MACHO-NEXT: @__dummy.__LLVM,offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
+
 //      HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
 // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
 // HIP-NEXT: @.hip.binary_handle = internal global ptr null

>From c2fbcbd7bebcde25a5c2ee0ab5cad51252d37488 Mon Sep 17 00:00:00 2001
From: Paulius Velesko <pvelesko at pglc.io>
Date: Wed, 1 Apr 2026 09:32:53 +0300
Subject: [PATCH 3/3] [HIP][Darwin] Add Mach-O support for offloading entries,
 fat binary sections, and startup functions

- Utility.cpp: emitOffloadingEntry() now emits to "__LLVM,offload_entries"
  section on Mach-O instead of the ELF-style "llvm_offload_entries".
- Utility.cpp: getOffloadEntryArray() uses Mach-O section boundary symbols
  (\1section$start$/__LLVM$offload_entries) and creates a dummy variable in
  the section to trigger linker symbol generation, following the same pattern
  as SanitizerCoverage.
- OffloadWrapper.cpp: HIP fat binary sections now use Mach-O segment,section
  format ("__HIP,__hip_fatbin", "__HIP,__fatbin").
- OffloadWrapper.cpp: All registration functions use "__TEXT,__StaticInit"
  instead of ".text.startup" on Mach-O to avoid invalid section specifier
  crashes during code generation.
---
 clang/test/Driver/linker-wrapper-image.c      | 28 +++++++++-----
 .../Frontend/Offloading/OffloadWrapper.cpp    | 28 ++++++++------
 llvm/lib/Frontend/Offloading/Utility.cpp      | 37 ++++++++++++++++---
 3 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 282eaa71ba8da..731e2e860ff0b 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -174,15 +174,23 @@
 
 //      HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
 // HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
-// HIP-MACHO-NEXT: @__dummy.__LLVM,offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
+// HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
 
-//      HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
-// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
-// HIP-NEXT: @.hip.binary_handle = internal global ptr null
+//      HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null
+
+//      HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null
+
+//      HIP-MACHO: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section "__HIP,__hip_fatbin"
+// HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
+// HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
 
 // HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
 
-//      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+//      HIP: define internal void @.hip.fatbin_reg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
 // HIP-NEXT: entry:
 // HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
 // HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
@@ -191,20 +199,20 @@
 // HIP-NEXT:   ret void
 // HIP-NEXT: }
 //
-//      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+//      HIP: define internal void @.hip.fatbin_unreg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
 // HIP-NEXT: entry:
 // HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
 // HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
 // HIP-NEXT:   ret void
 // HIP-NEXT: }
 //
-//      HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
+//      HIP: define internal void @.hip.globals_reg(ptr %0) section "{{\.text\.startup|__TEXT,__StaticInit}}" {
 // HIP-NEXT: entry:
-// HIP-NEXT:   %1 = icmp ne ptr @__start_llvm_offload_entries, @__stop_llvm_offload_entries
+// HIP-NEXT:   %1 = icmp ne ptr @{{.*offload_entries.*}}, @{{.*offload_entries.*}}
 // HIP-NEXT:   br i1 %1, label %while.entry, label %while.end
 //
 //      HIP: while.entry:
-// HIP-NEXT:   %entry1 = phi ptr [ @__start_llvm_offload_entries, %entry ], [ %16, %if.end ]
+// HIP-NEXT:   %entry1 = phi ptr [ @{{.*offload_entries.*}}, %entry ], [ %16, %if.end ]
 // HIP-NEXT:   %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
 // HIP-NEXT:   %addr = load ptr, ptr %2, align 8
 // HIP-NEXT:   %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
@@ -264,7 +272,7 @@
 //
 //      HIP: if.end:
 // HIP-NEXT:   %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
-// HIP-NEXT:   %17 = icmp eq ptr %16, @__stop_llvm_offload_entries
+// HIP-NEXT:   %17 = icmp eq ptr %16, @{{.*offload_entries.*}}
 // HIP-NEXT:   br i1 %17, label %while.end, label %while.entry
 //
 //      HIP: while.end:
diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
index 83fc4c6e03178..60815e68f7e25 100644
--- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
+++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
@@ -44,6 +44,12 @@ IntegerType *getSizeTTy(Module &M) {
   return M.getDataLayout().getIntPtrType(M.getContext());
 }
 
+/// Returns the appropriate startup section for registration functions.
+/// Mach-O uses "__TEXT,__StaticInit"; ELF/COFF use ".text.startup".
+StringRef getStartupSection(const Triple &T) {
+  return T.isOSBinFormatMachO() ? "__TEXT,__StaticInit" : ".text.startup";
+}
+
 // struct __tgt_device_image {
 //   void *ImageStart;
 //   void *ImageEnd;
@@ -207,7 +213,7 @@ Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
   auto *Func =
       Function::Create(FuncTy, GlobalValue::InternalLinkage,
                        ".omp_offloading.descriptor_unreg" + Suffix, &M);
-  Func->setSection(".text.startup");
+  Func->setSection(getStartupSection(M.getTargetTriple()));
 
   // Get __tgt_unregister_lib function declaration.
   auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -229,7 +235,7 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
   auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
   auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
                                 ".omp_offloading.descriptor_reg" + Suffix, &M);
-  Func->setSection(".text.startup");
+  Func->setSection(getStartupSection(M.getTargetTriple()));
 
   // Get __tgt_register_lib function declaration.
   auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -285,7 +291,7 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
 
   // Create the global string containing the fatbinary.
   StringRef FatbinConstantSection =
-      IsHIP ? ".hip_fatbin"
+      IsHIP ? (Triple.isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin")
             : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
   auto *Data = ConstantDataArray::get(C, Image);
   auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
@@ -294,9 +300,9 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
   Fatbin->setSection(FatbinConstantSection);
 
   // Create the fatbinary wrapper
-  StringRef FatbinWrapperSection = IsHIP               ? ".hipFatBinSegment"
-                                   : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
-                                                       : ".nvFatBinSegment";
+  StringRef FatbinWrapperSection =
+      IsHIP ? (Triple.isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment")
+            : (Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment");
   Constant *FatbinWrapper[] = {
       ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
       ConstantInt::get(Type::getInt32Ty(C), 1),
@@ -403,7 +409,7 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
   auto *RegGlobalsFn =
       Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
                        IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
-  RegGlobalsFn->setSection(".text.startup");
+  RegGlobalsFn->setSection(getStartupSection(M.getTargetTriple()));
 
   // Create the loop to register all the entries.
   IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
@@ -559,13 +565,13 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
   auto *CtorFunc = Function::Create(
       CtorFuncTy, GlobalValue::InternalLinkage,
       (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
-  CtorFunc->setSection(".text.startup");
+  CtorFunc->setSection(getStartupSection(M.getTargetTriple()));
 
   auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
   auto *DtorFunc = Function::Create(
       DtorFuncTy, GlobalValue::InternalLinkage,
       (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
-  DtorFunc->setSection(".text.startup");
+  DtorFunc->setSection(getStartupSection(M.getTargetTriple()));
 
   auto *PtrTy = PointerType::getUnqual(C);
 
@@ -702,7 +708,7 @@ class SYCLWrapper {
         FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
     Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
                                       Twine("sycl") + ".descriptor_reg", &M);
-    Func->setSection(".text.startup");
+    Func->setSection(getStartupSection(M.getTargetTriple()));
 
     // Get RegFuncName function declaration.
     FunctionType *RegFuncTy =
@@ -725,7 +731,7 @@ class SYCLWrapper {
         FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
     Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
                                       "sycl.descriptor_unreg", &M);
-    Func->setSection(".text.startup");
+    Func->setSection(getStartupSection(M.getTargetTriple()));
 
     // Get UnregFuncName function declaration.
     FunctionType *UnRegFuncTy =
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 44cef91bac495..db8b99bd0213d 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -104,6 +104,8 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
   // The entry has to be created in the section the linker expects it to be.
   if (Triple.isOSBinFormatCOFF())
     Entry->setSection((SectionName + "$OE").str());
+  else if (Triple.isOSBinFormatMachO())
+    Entry->setSection("__LLVM,offload_entries");
   else
     Entry->setSection(SectionName);
   Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
@@ -121,13 +123,27 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
   auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
                                             : GlobalValue::ExternalLinkage;
 
-  auto *EntriesB =
-      new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
-                         "__start_" + SectionName);
+  // Mach-O uses linker-defined section boundary symbols with a special naming
+  // convention: \1section$start$<segment>$<section> (the \1 prefix suppresses
+  // the leading underscore that Mach-O normally adds to C symbols).
+  // See SanitizerCoverage for the existing precedent:
+  //   llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+  //   compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c
+  std::string MachOSection = "__LLVM,offload_entries";
+  std::string StartName, StopName;
+  if (Triple.isOSBinFormatMachO()) {
+    StartName = "\1section$start$__LLVM$offload_entries";
+    StopName = "\1section$end$__LLVM$offload_entries";
+  } else {
+    StartName = ("__start_" + SectionName).str();
+    StopName = ("__stop_" + SectionName).str();
+  }
+
+  auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
+                                      Linkage, EntryInit, StartName);
   EntriesB->setVisibility(GlobalValue::HiddenVisibility);
-  auto *EntriesE =
-      new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
-                         "__stop_" + SectionName);
+  auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
+                                      Linkage, EntryInit, StopName);
   EntriesE->setVisibility(GlobalValue::HiddenVisibility);
 
   if (Triple.isOSBinFormatELF()) {
@@ -141,6 +157,15 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
     DummyEntry->setSection(SectionName);
     DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
     appendToCompilerUsed(M, DummyEntry);
+  } else if (Triple.isOSBinFormatMachO()) {
+    // Mach-O needs a dummy variable in the section (like ELF) to ensure the
+    // linker provides the section boundary symbols.
+    auto *DummyEntry = new GlobalVariable(
+        M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
+        ZeroInitilaizer, "__dummy." + MachOSection);
+    DummyEntry->setSection(MachOSection);
+    DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
+    appendToCompilerUsed(M, DummyEntry);
   } else {
     // The COFF linker will merge sections containing a '$' together into a
     // single section. The order of entries in this section will be sorted



More information about the llvm-commits mailing list