[clang] [Driver][HIP] Bundle AMDGPU -S output under the new offload driver (PR #188262)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 26 07:29:36 PDT 2026


https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/188262

>From b211570a8fddc0b4e674bbd1fd733520a95aae56 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Mon, 23 Mar 2026 13:38:21 -0400
Subject: [PATCH 1/2] [Driver][HIP] Bundle AMDGPU -S output under the new
 offload driver

The old offload driver emits bundled assembly code for -S in textual
clang-offload-bundler format. This allows a single .s file to contain assembly
code for both host and devices, which can be consumed by clang. This eases
manual optimization of assembly code for host and device. There are existing
HIP tests and examples depending on this feature. The new offload driver does
not support it, causing regressions. This patch adds support for this feature
with minor changes to the job action creations.

Fixes: LCOMPILER-553
---
 clang/include/clang/Driver/Driver.h |  6 ++-
 clang/lib/Driver/Driver.cpp         | 58 +++++++++++++++++++++++++++--
 clang/test/Driver/hip-phases.hip    |  3 ++
 3 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index b355ee6e15007..e9a9914647cb2 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -524,10 +524,14 @@ class Driver {
   /// \param Input - The input type and arguments
   /// \param CUID - The CUID for \p Input
   /// \param HostAction - The host action used in the offloading toolchain.
+  /// \param HIPAsmBundleDeviceOut - If non-null, HIP non-RDC \c -S (AMDGCN)
+  /// device actions are appended here and \p HostAction is returned unchanged
+  /// so the caller can emit a bundled \c .s via \c OffloadBundlingJobAction.
   Action *BuildOffloadingActions(Compilation &C,
                                  llvm::opt::DerivedArgList &Args,
                                  const InputTy &Input, StringRef CUID,
-                                 Action *HostAction) const;
+                                 Action *HostAction,
+                                 ActionList *HIPAsmBundleDeviceOut = nullptr) const;
 
   /// Returns the set of bound architectures active for this offload kind.
   /// If there are no bound architctures we return a set containing only the
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 3e2553686d87f..0f47009470595 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4439,6 +4439,35 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
   }
 }
 
+/// HIP non-RDC \c -S for AMDGCN: emit host and device assembly separately and
+/// bundle with \c clang-offload-bundler (new offload driver), instead of
+/// \c llvm-offload-binary / \c clang-linker-wrapper fatbin embedding.
+static bool shouldBundleHIPAsmWithNewDriver(const Compilation &C,
+                                            const llvm::opt::DerivedArgList &Args,
+                                            const Driver &D) {
+  if (!C.isOffloadingHostKind(Action::OFK_HIP))
+    return false;
+  if (!Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm))
+    return false;
+  if (D.offloadDeviceOnly())
+    return false;
+  if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+    return false;
+
+  bool HasAMDGCNHIPDevice = false;
+  auto HIPTCs = C.getOffloadToolChains(Action::OFK_HIP);
+  for (auto It = HIPTCs.first; It != HIPTCs.second; ++It) {
+    const ToolChain *TC = It->second;
+    if (!TC)
+      continue;
+    const llvm::Triple &Tr = TC->getTriple();
+    if (Tr.isSPIRV() || Tr.getArch() != llvm::Triple::amdgcn)
+      return false;
+    HasAMDGCNHIPDevice = true;
+  }
+  return HasAMDGCNHIPDevice;
+}
+
 void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
                           const InputList &Inputs, ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
@@ -4484,6 +4513,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
       cast<InputAction>(Current)->setId(CUID);
     }
 
+    ActionList HIPAsmDeviceActions;
+
     // Use the current host action in any of the offloading actions, if
     // required.
     if (!UseNewOffloadingDriver)
@@ -4547,7 +4578,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
       // Try to build the offloading actions and add the result as a dependency
       // to the host.
       if (UseNewOffloadingDriver)
-        Current = BuildOffloadingActions(C, Args, I, CUID, Current);
+        Current = BuildOffloadingActions(C, Args, I, CUID, Current,
+                                         &HIPAsmDeviceActions);
       // Use the current host action in any of the offloading actions, if
       // required.
       else if (OffloadBuilder->addHostDependenceToDeviceActions(Current,
@@ -4558,6 +4590,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
         break;
     }
 
+    // HIP non-RDC -S (AMDGCN): bundle host and device assembly like the
+    // classic driver instead of embedding a fat binary in host asm.
+    if (Current && !HIPAsmDeviceActions.empty()) {
+      assert(UseNewOffloadingDriver && "unexpected HIP asm bundle list");
+      ActionList BundleInputs;
+      BundleInputs.append(HIPAsmDeviceActions);
+      BundleInputs.push_back(Current);
+      Current = C.MakeAction<OffloadBundlingJobAction>(BundleInputs);
+    }
+
     // If we ended with something, add to the output list.
     if (Current)
       Actions.push_back(Current);
@@ -4900,7 +4942,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
 Action *Driver::BuildOffloadingActions(Compilation &C,
                                        llvm::opt::DerivedArgList &Args,
                                        const InputTy &Input, StringRef CUID,
-                                       Action *HostAction) const {
+                                       Action *HostAction,
+                                       ActionList *HIPAsmBundleDeviceOut) const {
   // Don't build offloading actions if explicitly disabled or we do not have a
   // valid source input.
   if (offloadHostOnly() || !types::isSrcFile(Input.first))
@@ -5104,6 +5147,13 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
              *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
              Action::OFK_HIP);
   } else if (HIPNoRDC) {
+    // Host + device assembly: defer to clang-offload-bundler (see BuildActions).
+    if (HIPAsmBundleDeviceOut &&
+        shouldBundleHIPAsmWithNewDriver(C, Args, C.getDriver())) {
+      for (Action *OA : OffloadActions)
+        HIPAsmBundleDeviceOut->push_back(OA);
+      return HostAction;
+    }
     // Package all the offloading actions into a single output that can be
     // embedded in the host and linked.
     Action *PackagerAction =
@@ -5257,7 +5307,9 @@ Action *Driver::ConstructPhaseAction(
         Args.hasFlag(options::OPT_offload_new_driver,
                      options::OPT_no_offload_new_driver,
                      C.getActiveOffloadKinds() != Action::OFK_None) &&
-        !offloadDeviceOnly() && !isSaveTempsEnabled())
+        !offloadDeviceOnly() && !isSaveTempsEnabled() &&
+        !(Args.hasArg(options::OPT_S) &&
+          !Args.hasArg(options::OPT_emit_llvm)))
       return Input;
 
     if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 47e4bfca68a39..ae4a2a6e53cda 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -67,6 +67,9 @@
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefixes=ASM %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=ASM %s
 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])

>From fb1aea321684e817d6c99f27136fd46f5b0f7ee1 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 26 Mar 2026 10:25:50 -0400
Subject: [PATCH 2/2] Apply clang-format and address Joseph Huber review (PR
 #188262)

- Run clang-format (fix code_formatter CI on PR #188262).
- Combine early guards in shouldBundleHIPAsmWithNewDriver into one condition.
---
 clang/include/clang/Driver/Driver.h | 10 ++++-----
 clang/lib/Driver/Driver.cpp         | 34 ++++++++++++++---------------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index e9a9914647cb2..5490e06b6f3f3 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -527,11 +527,11 @@ class Driver {
   /// \param HIPAsmBundleDeviceOut - If non-null, HIP non-RDC \c -S (AMDGCN)
   /// device actions are appended here and \p HostAction is returned unchanged
   /// so the caller can emit a bundled \c .s via \c OffloadBundlingJobAction.
-  Action *BuildOffloadingActions(Compilation &C,
-                                 llvm::opt::DerivedArgList &Args,
-                                 const InputTy &Input, StringRef CUID,
-                                 Action *HostAction,
-                                 ActionList *HIPAsmBundleDeviceOut = nullptr) const;
+  Action *
+  BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args,
+                         const InputTy &Input, StringRef CUID,
+                         Action *HostAction,
+                         ActionList *HIPAsmBundleDeviceOut = nullptr) const;
 
   /// Returns the set of bound architectures active for this offload kind.
   /// If there are no bound architctures we return a set containing only the
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 0f47009470595..ba5e50381c9f8 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4442,16 +4442,14 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
 /// HIP non-RDC \c -S for AMDGCN: emit host and device assembly separately and
 /// bundle with \c clang-offload-bundler (new offload driver), instead of
 /// \c llvm-offload-binary / \c clang-linker-wrapper fatbin embedding.
-static bool shouldBundleHIPAsmWithNewDriver(const Compilation &C,
-                                            const llvm::opt::DerivedArgList &Args,
-                                            const Driver &D) {
-  if (!C.isOffloadingHostKind(Action::OFK_HIP))
-    return false;
-  if (!Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm))
-    return false;
-  if (D.offloadDeviceOnly())
-    return false;
-  if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+static bool
+shouldBundleHIPAsmWithNewDriver(const Compilation &C,
+                                const llvm::opt::DerivedArgList &Args,
+                                const Driver &D) {
+  if (!C.isOffloadingHostKind(Action::OFK_HIP) ||
+      !Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm) ||
+      D.offloadDeviceOnly() ||
+      Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
     return false;
 
   bool HasAMDGCNHIPDevice = false;
@@ -4939,11 +4937,11 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
   return Sorted;
 }
 
-Action *Driver::BuildOffloadingActions(Compilation &C,
-                                       llvm::opt::DerivedArgList &Args,
-                                       const InputTy &Input, StringRef CUID,
-                                       Action *HostAction,
-                                       ActionList *HIPAsmBundleDeviceOut) const {
+Action *
+Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args,
+                               const InputTy &Input, StringRef CUID,
+                               Action *HostAction,
+                               ActionList *HIPAsmBundleDeviceOut) const {
   // Don't build offloading actions if explicitly disabled or we do not have a
   // valid source input.
   if (offloadHostOnly() || !types::isSrcFile(Input.first))
@@ -5147,7 +5145,8 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
              *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
              Action::OFK_HIP);
   } else if (HIPNoRDC) {
-    // Host + device assembly: defer to clang-offload-bundler (see BuildActions).
+    // Host + device assembly: defer to clang-offload-bundler (see
+    // BuildActions).
     if (HIPAsmBundleDeviceOut &&
         shouldBundleHIPAsmWithNewDriver(C, Args, C.getDriver())) {
       for (Action *OA : OffloadActions)
@@ -5308,8 +5307,7 @@ Action *Driver::ConstructPhaseAction(
                      options::OPT_no_offload_new_driver,
                      C.getActiveOffloadKinds() != Action::OFK_None) &&
         !offloadDeviceOnly() && !isSaveTempsEnabled() &&
-        !(Args.hasArg(options::OPT_S) &&
-          !Args.hasArg(options::OPT_emit_llvm)))
+        !(Args.hasArg(options::OPT_S) && !Args.hasArg(options::OPT_emit_llvm)))
       return Input;
 
     if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {



More information about the cfe-commits mailing list