[llvm-branch-commits] [clang] clang/AMDGPU: Pass BoundArch through device libs handling (PR #196586)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 8 10:17:04 PDT 2026
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/196586
Pre-work to consolidate target identification for future target
option bug fixes. Also requires updating flang to match recent
clang changes.
Co-authored-by: Claude Sonnet 4 <noreply at anthropic.com>
>From 89264ffcfcbae79d28a3ef8d5dc096e1c4012c37 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 8 May 2026 11:19:48 +0100
Subject: [PATCH] clang/AMDGPU: Pass BoundArch through device libs handling
Pre-work to consolidate target identification for future target
option bug fixes. Also requires updating flang to match recent
clang changes.
Co-authored-by: Claude Sonnet 4 <noreply at anthropic.com>
---
clang/include/clang/Driver/ToolChain.h | 2 +-
clang/lib/Driver/ToolChain.cpp | 2 +-
clang/lib/Driver/ToolChains/AMDGPU.cpp | 11 ++++++---
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 11 ++++-----
clang/lib/Driver/ToolChains/AMDGPUOpenMP.h | 2 +-
clang/lib/Driver/ToolChains/Flang.cpp | 24 ++++++++++++--------
clang/lib/Driver/ToolChains/Flang.h | 16 ++++++++++---
clang/lib/Driver/ToolChains/HIPAMD.cpp | 15 ++++++++----
clang/lib/Driver/ToolChains/HIPAMD.h | 2 +-
clang/lib/Driver/ToolChains/HIPSPV.cpp | 4 ++--
clang/lib/Driver/ToolChains/HIPSPV.h | 2 +-
11 files changed, 57 insertions(+), 34 deletions(-)
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 117563ef5c950..e5daadba00e04 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -834,7 +834,7 @@ class ToolChain {
/// Get paths for device libraries.
virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const;
/// Add the system specific libraries for the active offload kinds.
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index a68d39deda915..f920c2014c4a4 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1799,7 +1799,7 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-ToolChain::getDeviceLibs(const ArgList &DriverArgs,
+ToolChain::getDeviceLibs(const ArgList &DriverArgs, StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const {
return {};
}
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 0f87043cf801e..06f9c6ac97047 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -982,9 +982,14 @@ void ROCMToolChain::addClangTargetOptions(
if (TT.getEnvironment() == llvm::Triple::LLVM)
return;
- // Get the device name and canonicalize it
- const StringRef GpuArch = getGPUArch(DriverArgs);
- auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+ // Get the device name and canonicalize it. For offload compilation,
+ // BoundArch contains the full target ID. For non-offload (OpenCL),
+ // fall back to -mcpu.
+ StringRef GpuArch = BoundArch.empty() ? getGPUArch(DriverArgs) : BoundArch;
+
+ // Extract processor name for canonical arch lookup
+ StringRef Processor = getProcessorFromTargetID(getTriple(), GpuArch);
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(Processor);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch);
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 1f8b078dc4f44..c9965e8e6014b 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -43,7 +43,8 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
true))
return;
- for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
+ for (auto BCFile :
+ getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -126,17 +127,15 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
AMDGPUOpenMPToolChain::getDeviceLibs(
- const llvm::opt::ArgList &Args,
+ const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const {
if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true))
return {};
- StringRef GpuArch = getProcessorFromTargetID(
- getTriple(), Args.getLastArgValue(options::OPT_march_EQ));
-
+ StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch);
SmallVector<BitCodeLibraryInfo, 12> BCLibs;
for (auto BCLib :
- getCommonDeviceLibNames(Args, GpuArch.str(), DeviceOffloadingKind))
+ getCommonDeviceLibNames(Args, GpuArch, DeviceOffloadingKind))
BCLibs.emplace_back(BCLib);
return BCLibs;
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
index 4262640369780..4240e1e52f130 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -59,7 +59,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
const llvm::opt::ArgList &Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadKind) const override;
const ToolChain &HostTC;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 7a7a69e1b13d0..06ce0a419cc00 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -521,8 +521,9 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args,
}
}
-void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
- ArgStringList &CmdArgs) const {
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
StringRef Val = A->getValue();
CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
@@ -532,11 +533,12 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
}
const ToolChain &TC = getToolChain();
- TC.addClangTargetOptions(Args, CmdArgs, "", Action::OffloadKind::OFK_OpenMP);
+ TC.addClangTargetOptions(Args, CmdArgs, BoundArch, DeviceOffloadKind);
}
-void Flang::AddNVPTXTargetArgs(const ArgList &Args,
- ArgStringList &CmdArgs) const {
+void Flang::AddNVPTXTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
// we cannot use addClangTargetOptions, as it appends unsupported args for
// flang: -fcuda-is-device, -fno-threadsafe-statics,
// -fcuda-allow-variadic-functions and -target-sdk-version Instead we manually
@@ -571,8 +573,9 @@ void Flang::AddNVPTXTargetArgs(const ArgList &Args,
CmdArgs.push_back(Args.MakeArgString(LibDeviceFile));
}
-void Flang::addTargetOptions(const ArgList &Args,
- ArgStringList &CmdArgs) const {
+void Flang::addTargetOptions(const ArgList &Args, ArgStringList &CmdArgs,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
const ToolChain &TC = getToolChain();
const llvm::Triple &Triple = TC.getEffectiveTriple();
const Driver &D = TC.getDriver();
@@ -598,11 +601,11 @@ void Flang::addTargetOptions(const ArgList &Args,
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
- AddAMDGPUTargetArgs(Args, CmdArgs);
+ AddAMDGPUTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind);
break;
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
- AddNVPTXTargetArgs(Args, CmdArgs);
+ AddNVPTXTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind);
break;
case llvm::Triple::riscv64:
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
@@ -1105,7 +1108,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
addFloatingPointOptions(D, Args, CmdArgs);
// Add target args, features, etc.
- addTargetOptions(Args, CmdArgs);
+ addTargetOptions(Args, CmdArgs, JA.getOffloadingArch(),
+ JA.getOffloadingDeviceKind());
if (!TC.useIntegratedAs())
CmdArgs.push_back("-no-integrated-as");
diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h
index 62d2c6bb2a093..f08baa0fd5c12 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -61,8 +61,12 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
///
/// \param [in] Args The list of input driver arguments
/// \param [out] CmdArgs The list of output command arguments
+ /// \param [in] BoundArch The bound architecture for offload compilation
+ /// \param [in] DeviceOffloadKind The offload kind
void addTargetOptions(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const;
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const;
/// Add specific options for AArch64 target.
///
@@ -75,11 +79,17 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
///
/// \param [in] Args The list of input driver arguments
/// \param [out] CmdArgs The list of output command arguments
+ /// \param [in] BoundArch The bound architecture for offload compilation
+ /// \param [in] DeviceOffloadKind The offload kind
void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const;
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const;
void AddNVPTXTargetArgs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const;
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const;
/// Add specific options for LoongArch64 target.
///
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 575666bc6e89a..966f1f4d1ec44 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -285,7 +285,8 @@ void HIPAMDToolChain::addClangTargetOptions(
return; // No DeviceLibs for SPIR-V.
}
- for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
+ for (auto BCFile :
+ getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -377,14 +378,20 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
+ llvm::StringRef BoundArch,
Action::OffloadKind DeviceOffloadingKind) const {
+ assert(!BoundArch.empty() && "Must have an explicit GPU arch.");
+
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
const llvm::Triple &TT = getEffectiveTriple();
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
true) ||
- TT.getEnvironment() == llvm::Triple::LLVM ||
- getGPUArch(DriverArgs) == "amdgcnspirv")
+ TT.getEnvironment() == llvm::Triple::LLVM)
+ return {};
+
+ StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch);
+ if (GpuArch == "amdgcnspirv")
return {};
ArgStringList LibraryPaths;
@@ -418,8 +425,6 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
return {};
}
- StringRef GpuArch = getGPUArch(DriverArgs);
- assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
// Add common device libraries like ocml etc.
for (auto N :
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
index f52c9b19ff74e..ef0ca7c5deb1d 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -82,7 +82,7 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
SanitizerMask getSupportedSanitizers() const override;
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp
index f3718e87a7b5a..86cad9d6fff73 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.cpp
+++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp
@@ -181,7 +181,7 @@ void HIPSPVToolChain::addClangTargetOptions(
{"-fvisibility=hidden", "-fapply-global-visibility-to-externs"});
for (const BitCodeLibraryInfo &BCFile :
- getDeviceLibs(DriverArgs, DeviceOffloadingKind))
+ getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind))
CC1Args.append(
{"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)});
}
@@ -243,7 +243,7 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPSPVToolChain::getDeviceLibs(
- const llvm::opt::ArgList &DriverArgs,
+ const llvm::opt::ArgList &DriverArgs, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const {
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h
index 7f823d3a8c8ca..661744ff11a7e 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.h
+++ b/clang/lib/Driver/ToolChains/HIPSPV.h
@@ -73,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadKind) const override;
SanitizerMask getSupportedSanitizers() const override;
More information about the llvm-branch-commits
mailing list