[clang] clang/AMDGPU: Pass BoundArch through device libs handling (PR #196586)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 10 09:14:27 PDT 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/196586
>From 03adb2dafef1e06eb631d8f2a6f0b05b9cb8f426 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 8 May 2026 11:19:48 +0100
Subject: [PATCH] clang/AMDGPU: Pass BoundArch through device libs handling
Pre-work to consolidate target identification for future target
option bug fixes. Also requires updating flang to match recent
clang changes.
Co-authored-by: Claude Sonnet 4 <noreply at anthropic.com>
---
clang/include/clang/Driver/ToolChain.h | 2 +-
clang/lib/Driver/ToolChain.cpp | 2 +-
clang/lib/Driver/ToolChains/AMDGPU.cpp | 14 ++++++++----
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 13 ++++-------
clang/lib/Driver/ToolChains/AMDGPUOpenMP.h | 2 +-
clang/lib/Driver/ToolChains/Flang.cpp | 24 ++++++++++++--------
clang/lib/Driver/ToolChains/Flang.h | 16 ++++++++++---
clang/lib/Driver/ToolChains/HIPAMD.cpp | 15 +++++++-----
clang/lib/Driver/ToolChains/HIPAMD.h | 2 +-
clang/lib/Driver/ToolChains/HIPSPV.cpp | 4 ++--
clang/lib/Driver/ToolChains/HIPSPV.h | 2 +-
11 files changed, 57 insertions(+), 39 deletions(-)
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 453af0783b445..8953c299268df 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -868,7 +868,7 @@ class ToolChain {
/// Get paths for device libraries.
virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const;
/// Add the system specific libraries for the active offload kinds.
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 67aa642c464e2..8ba82ea10b578 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1871,7 +1871,7 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-ToolChain::getDeviceLibs(const ArgList &DriverArgs,
+ToolChain::getDeviceLibs(const ArgList &DriverArgs, StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const {
return {};
}
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 47df326fca9be..533de5416eeb9 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -981,11 +981,16 @@ void ROCMToolChain::addClangTargetOptions(
if (TT.getEnvironment() == llvm::Triple::LLVM)
return;
- AMDGPUToolChain::ParsedTargetIDType TargetID = getParsedTargetID(DriverArgs);
- StringRef GpuArch =
- TargetID.OptionalGPUArch ? *TargetID.OptionalGPUArch : StringRef();
+ // Get the device name and canonicalize it. For offload compilation,
+ // BoundArch contains the full target ID. For non-offload (OpenCL),
+ // fall back to -mcpu.
+ StringRef TargetID = BoundArch.empty()
+ ? DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)
+ : BoundArch;
+ StringRef GpuArch = getProcessorFromTargetID(getTriple(), TargetID);
StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(GpuArch);
+
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
getAMDGPUCodeObjectVersion(getDriver(), DriverArgs));
if (!RocmInstallation->checkCommonBitcodeLibs(GpuArch, LibDeviceFile, ABIVer))
@@ -998,8 +1003,7 @@ void ROCMToolChain::addClangTargetOptions(
// Add the generic set of libraries.
BCLibs.append(RocmInstallation->getCommonBitcodeLibs(
DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind,
- getSanitizerArgs(DriverArgs, TargetID.OptionalTargetID.value_or(""),
- DeviceOffloadingKind)
+ getSanitizerArgs(DriverArgs, TargetID, DeviceOffloadingKind)
.needsAsanRt()));
for (auto [BCFile, Internalize] : BCLibs) {
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 1d3568321438f..e7a169a374464 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -43,7 +43,8 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
true))
return;
- for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
+ for (auto BCFile :
+ getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -122,19 +123,15 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
AMDGPUOpenMPToolChain::getDeviceLibs(
- const llvm::opt::ArgList &Args,
+ const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const {
if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true))
return {};
- AMDGPUToolChain::ParsedTargetIDType TargetID = getParsedTargetID(Args);
- if (!TargetID.OptionalTargetID)
- return {};
-
+ StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch);
SmallVector<BitCodeLibraryInfo, 12> BCLibs;
for (auto BCLib :
- getCommonDeviceLibNames(Args, *TargetID.OptionalTargetID,
- *TargetID.OptionalGPUArch, DeviceOffloadingKind))
+ getCommonDeviceLibNames(Args, BoundArch, GpuArch, DeviceOffloadingKind))
BCLibs.emplace_back(BCLib);
return BCLibs;
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
index b1bcb08899c46..7e212f15a9ebc 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -58,7 +58,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
const llvm::opt::ArgList &Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadKind) const override;
/// OpenMP uses LTO by default to link device bitcode.
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index c713b7904b22c..fcbbcbcc7018b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -522,8 +522,9 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args,
}
}
-void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
- ArgStringList &CmdArgs) const {
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
StringRef Val = A->getValue();
CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
@@ -533,11 +534,12 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
}
const ToolChain &TC = getToolChain();
- TC.addClangTargetOptions(Args, CmdArgs, "", Action::OffloadKind::OFK_OpenMP);
+ TC.addClangTargetOptions(Args, CmdArgs, BoundArch, DeviceOffloadKind);
}
-void Flang::AddNVPTXTargetArgs(const ArgList &Args,
- ArgStringList &CmdArgs) const {
+void Flang::AddNVPTXTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
// we cannot use addClangTargetOptions, as it appends unsupported args for
// flang: -fcuda-is-device, -fno-threadsafe-statics,
// -fcuda-allow-variadic-functions and -target-sdk-version Instead we manually
@@ -572,8 +574,9 @@ void Flang::AddNVPTXTargetArgs(const ArgList &Args,
CmdArgs.push_back(Args.MakeArgString(LibDeviceFile));
}
-void Flang::addTargetOptions(const ArgList &Args,
- ArgStringList &CmdArgs) const {
+void Flang::addTargetOptions(const ArgList &Args, ArgStringList &CmdArgs,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
const ToolChain &TC = getToolChain();
const llvm::Triple &Triple = TC.getEffectiveTriple();
const Driver &D = TC.getDriver();
@@ -599,11 +602,11 @@ void Flang::addTargetOptions(const ArgList &Args,
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
- AddAMDGPUTargetArgs(Args, CmdArgs);
+ AddAMDGPUTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind);
break;
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
- AddNVPTXTargetArgs(Args, CmdArgs);
+ AddNVPTXTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind);
break;
case llvm::Triple::riscv64:
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
@@ -1111,7 +1114,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
addFloatingPointOptions(D, Args, CmdArgs);
// Add target args, features, etc.
- addTargetOptions(Args, CmdArgs);
+ addTargetOptions(Args, CmdArgs, JA.getOffloadingArch(),
+ JA.getOffloadingDeviceKind());
if (!TC.useIntegratedAs())
CmdArgs.push_back("-no-integrated-as");
diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h
index 62d2c6bb2a093..f08baa0fd5c12 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -61,8 +61,12 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
///
/// \param [in] Args The list of input driver arguments
/// \param [out] CmdArgs The list of output command arguments
+ /// \param [in] BoundArch The bound architecture for offload compilation
+ /// \param [in] DeviceOffloadKind The offload kind
void addTargetOptions(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const;
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const;
/// Add specific options for AArch64 target.
///
@@ -75,11 +79,17 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
///
/// \param [in] Args The list of input driver arguments
/// \param [out] CmdArgs The list of output command arguments
+ /// \param [in] BoundArch The bound architecture for offload compilation
+ /// \param [in] DeviceOffloadKind The offload kind
void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const;
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const;
void AddNVPTXTargetArgs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const;
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const;
/// Add specific options for LoongArch64 target.
///
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 26ce048c4c69d..4adb84f353d25 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -282,7 +282,8 @@ void HIPAMDToolChain::addClangTargetOptions(
return; // No DeviceLibs for SPIR-V.
}
- for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
+ for (auto BCFile :
+ getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgStringRef(BCFile.Path));
@@ -371,7 +372,10 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
+ llvm::StringRef BoundArch,
Action::OffloadKind DeviceOffloadingKind) const {
+ assert(!BoundArch.empty() && "Must have an explicit GPU arch.");
+
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
const llvm::Triple &TT = getEffectiveTriple();
@@ -380,8 +384,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
TT.getEnvironment() == llvm::Triple::LLVM)
return {};
- AMDGPUToolChain::ParsedTargetIDType TargetID = getParsedTargetID(DriverArgs);
- if (!TargetID.OptionalTargetID || TargetID.OptionalTargetID == "amdgcnspirv")
+ StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch);
+ if (GpuArch == "amdgcnspirv")
return {};
ArgStringList LibraryPaths;
@@ -418,9 +422,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
}
// Add common device libraries like ocml etc.
- for (auto N : getCommonDeviceLibNames(
- DriverArgs, *TargetID.OptionalTargetID, *TargetID.OptionalGPUArch,
- DeviceOffloadingKind))
+ for (auto N : getCommonDeviceLibNames(DriverArgs, BoundArch, GpuArch,
+ DeviceOffloadingKind))
BCLibs.emplace_back(N);
// Add instrument lib.
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
index e0b72eda0dd6c..8277119bf9348 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
VersionTuple
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp
index a4177a8a6665d..0d93e55137889 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.cpp
+++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp
@@ -181,7 +181,7 @@ void HIPSPVToolChain::addClangTargetOptions(
{"-fvisibility=hidden", "-fapply-global-visibility-to-externs"});
for (const BitCodeLibraryInfo &BCFile :
- getDeviceLibs(DriverArgs, DeviceOffloadingKind))
+ getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind))
CC1Args.append(
{"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)});
}
@@ -243,7 +243,7 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPSPVToolChain::getDeviceLibs(
- const llvm::opt::ArgList &DriverArgs,
+ const llvm::opt::ArgList &DriverArgs, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadingKind) const {
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h
index f7f50e1f36688..8e2fd91a4b7ac 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.h
+++ b/clang/lib/Driver/ToolChains/HIPSPV.h
@@ -74,7 +74,7 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
- getDeviceLibs(const llvm::opt::ArgList &Args,
+ getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch,
const Action::OffloadKind DeviceOffloadKind) const override;
SanitizerMask
More information about the cfe-commits
mailing list