[clang] [Driver][HIP] Bundle AMDGPU -S output under the new offload driver (PR #188262)

via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 24 07:57:57 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Yaxun (Sam) Liu (yxsamliu)

<details>
<summary>Changes</summary>

[Driver][HIP] Bundle AMDGPU -S output under the new offload driver

The old offload driver emits bundled assembly code for -S in textual
clang-offload-bundler format. This allows a single .s file to contain assembly
code for both host and devices, which can be consumed by clang. This eases
manual optimization of assembly code for host and device. There are existing
HIP tests and examples depending on this feature. The new offload driver does
not support it, causing regressions. This patch adds support for this feature
with minor changes to the job action creations.

Fixes: LCOMPILER-553



---
Full diff: https://github.com/llvm/llvm-project/pull/188262.diff


3 Files Affected:

- (modified) clang/include/clang/Driver/Driver.h (+5-1) 
- (modified) clang/lib/Driver/Driver.cpp (+55-3) 
- (modified) clang/test/Driver/hip-phases.hip (+3) 


``````````diff
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index b355ee6e15007..e9a9914647cb2 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -524,10 +524,14 @@ class Driver {
   /// \param Input - The input type and arguments
   /// \param CUID - The CUID for \p Input
   /// \param HostAction - The host action used in the offloading toolchain.
+  /// \param HIPAsmBundleDeviceOut - If non-null, HIP non-RDC \c -S (AMDGCN)
+  /// device actions are appended here and \p HostAction is returned unchanged
+  /// so the caller can emit a bundled \c .s via \c OffloadBundlingJobAction.
   Action *BuildOffloadingActions(Compilation &C,
                                  llvm::opt::DerivedArgList &Args,
                                  const InputTy &Input, StringRef CUID,
-                                 Action *HostAction) const;
+                                 Action *HostAction,
+                                 ActionList *HIPAsmBundleDeviceOut = nullptr) const;
 
   /// Returns the set of bound architectures active for this offload kind.
   /// If there are no bound architctures we return a set containing only the
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 3e2553686d87f..0f47009470595 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4439,6 +4439,35 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
   }
 }
 
+/// HIP non-RDC \c -S for AMDGCN: emit host and device assembly separately and
+/// bundle with \c clang-offload-bundler (new offload driver), instead of
+/// \c llvm-offload-binary / \c clang-linker-wrapper fatbin embedding.
+static bool shouldBundleHIPAsmWithNewDriver(const Compilation &C,
+                                            const llvm::opt::DerivedArgList &Args,
+                                            const Driver &D) {
+  if (!C.isOffloadingHostKind(Action::OFK_HIP))
+    return false;
+  if (!Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm))
+    return false;
+  if (D.offloadDeviceOnly())
+    return false;
+  if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+    return false;
+
+  bool HasAMDGCNHIPDevice = false;
+  auto HIPTCs = C.getOffloadToolChains(Action::OFK_HIP);
+  for (auto It = HIPTCs.first; It != HIPTCs.second; ++It) {
+    const ToolChain *TC = It->second;
+    if (!TC)
+      continue;
+    const llvm::Triple &Tr = TC->getTriple();
+    if (Tr.isSPIRV() || Tr.getArch() != llvm::Triple::amdgcn)
+      return false;
+    HasAMDGCNHIPDevice = true;
+  }
+  return HasAMDGCNHIPDevice;
+}
+
 void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
                           const InputList &Inputs, ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
@@ -4484,6 +4513,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
       cast<InputAction>(Current)->setId(CUID);
     }
 
+    ActionList HIPAsmDeviceActions;
+
     // Use the current host action in any of the offloading actions, if
     // required.
     if (!UseNewOffloadingDriver)
@@ -4547,7 +4578,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
       // Try to build the offloading actions and add the result as a dependency
       // to the host.
       if (UseNewOffloadingDriver)
-        Current = BuildOffloadingActions(C, Args, I, CUID, Current);
+        Current = BuildOffloadingActions(C, Args, I, CUID, Current,
+                                         &HIPAsmDeviceActions);
       // Use the current host action in any of the offloading actions, if
       // required.
       else if (OffloadBuilder->addHostDependenceToDeviceActions(Current,
@@ -4558,6 +4590,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
         break;
     }
 
+    // HIP non-RDC -S (AMDGCN): bundle host and device assembly like the
+    // classic driver instead of embedding a fat binary in host asm.
+    if (Current && !HIPAsmDeviceActions.empty()) {
+      assert(UseNewOffloadingDriver && "unexpected HIP asm bundle list");
+      ActionList BundleInputs;
+      BundleInputs.append(HIPAsmDeviceActions);
+      BundleInputs.push_back(Current);
+      Current = C.MakeAction<OffloadBundlingJobAction>(BundleInputs);
+    }
+
     // If we ended with something, add to the output list.
     if (Current)
       Actions.push_back(Current);
@@ -4900,7 +4942,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
 Action *Driver::BuildOffloadingActions(Compilation &C,
                                        llvm::opt::DerivedArgList &Args,
                                        const InputTy &Input, StringRef CUID,
-                                       Action *HostAction) const {
+                                       Action *HostAction,
+                                       ActionList *HIPAsmBundleDeviceOut) const {
   // Don't build offloading actions if explicitly disabled or we do not have a
   // valid source input.
   if (offloadHostOnly() || !types::isSrcFile(Input.first))
@@ -5104,6 +5147,13 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
              *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
              Action::OFK_HIP);
   } else if (HIPNoRDC) {
+    // Host + device assembly: defer to clang-offload-bundler (see BuildActions).
+    if (HIPAsmBundleDeviceOut &&
+        shouldBundleHIPAsmWithNewDriver(C, Args, C.getDriver())) {
+      for (Action *OA : OffloadActions)
+        HIPAsmBundleDeviceOut->push_back(OA);
+      return HostAction;
+    }
     // Package all the offloading actions into a single output that can be
     // embedded in the host and linked.
     Action *PackagerAction =
@@ -5257,7 +5307,9 @@ Action *Driver::ConstructPhaseAction(
         Args.hasFlag(options::OPT_offload_new_driver,
                      options::OPT_no_offload_new_driver,
                      C.getActiveOffloadKinds() != Action::OFK_None) &&
-        !offloadDeviceOnly() && !isSaveTempsEnabled())
+        !offloadDeviceOnly() && !isSaveTempsEnabled() &&
+        !(Args.hasArg(options::OPT_S) &&
+          !Args.hasArg(options::OPT_emit_llvm)))
       return Input;
 
     if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 47e4bfca68a39..ae4a2a6e53cda 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -67,6 +67,9 @@
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefixes=ASM %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=ASM %s
 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])

``````````

</details>


https://github.com/llvm/llvm-project/pull/188262


More information about the cfe-commits mailing list