[clang] 44e306e - [Clang] Correctly link and handle PGO options on the GPU (#185761)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 19 07:18:16 PDT 2026
Author: Joseph Huber
Date: 2026-03-19T09:18:10-05:00
New Revision: 44e306ecdb02bdf3fff067a39bf5a763c786a78d
URL: https://github.com/llvm/llvm-project/commit/44e306ecdb02bdf3fff067a39bf5a763c786a78d
DIFF: https://github.com/llvm/llvm-project/commit/44e306ecdb02bdf3fff067a39bf5a763c786a78d.diff
LOG: [Clang] Correctly link and handle PGO options on the GPU (#185761)
Summary:
Currently, the GPU targets ignore the standard profiling arguments. This
PR changes the behavior to use the standard handling, which links the in
the now-present `libclang_rt.profile.a` if the user built with the
compiler-rt support enabled. If it is not present this is a linker error
and we can always suppress with `-Xarch_host` and `-Xarch_device`.
Hopefully this doesn't cause some people pain if they're used to doing
`-fprofile-generate` on a CPU unguarded since it was a stange mix of a
no-op and not a no-op on the GPU until now.
Added:
clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a
clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a
Modified:
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Driver/ToolChains/Cuda.cpp
clang/test/Driver/amdgpu-toolchain.c
clang/test/Driver/cuda-cross-compiling.c
clang/test/Driver/openmp-offload-gpu.c
Removed:
################################################################################
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 7bbdb71b1e24f..54fbd86168602 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -632,6 +632,8 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ",")));
}
+ getToolChain().addProfileRTLibs(Args, CmdArgs);
+
if (Args.hasArg(options::OPT_stdlib))
CmdArgs.append({"-lc", "-lm"});
if (Args.hasArg(options::OPT_startfiles)) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3b852528d92c4..6416baf9126ff 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9357,9 +9357,23 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
OPT_flto_partitions_EQ,
OPT_flto_EQ,
OPT_hipspv_pass_plugin_EQ,
- OPT_use_spirv_backend};
+ OPT_use_spirv_backend,
+ OPT_fprofile_generate,
+ OPT_fprofile_generate_EQ,
+ OPT_fprofile_instr_generate,
+ OPT_fprofile_instr_generate_EQ};
const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input};
auto ShouldForwardForToolChain = [&](Arg *A, const ToolChain &TC) {
+ auto HasProfileRT = TC.getVFS().exists(
+ TC.getCompilerRT(Args, "profile", ToolChain::FT_Static));
+ // Don't forward profiling arguments if the toolchain doesn't support it.
+ // Without this check using it on the host would result in linker errors.
+ if (!HasProfileRT &&
+ (A->getOption().matches(OPT_fprofile_generate) ||
+ A->getOption().matches(OPT_fprofile_generate_EQ) ||
+ A->getOption().matches(OPT_fprofile_instr_generate) ||
+ A->getOption().matches(OPT_fprofile_instr_generate_EQ)))
+ return false;
// Don't forward -mllvm to toolchains that don't support LLVM.
return TC.HasNativeLLVMSupport() || A->getOption().getID() != OPT_mllvm;
};
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index e0020176800fd..2ca8886936f6c 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -643,6 +643,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
+ getToolChain().addProfileRTLibs(Args, CmdArgs);
+
if (Args.hasArg(options::OPT_stdlib))
CmdArgs.append({"-lc", "-lm"});
if (Args.hasArg(options::OPT_startfiles)) {
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c
index 459c1bdac246f..2a48ca6bb7670 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -46,3 +46,9 @@
// RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 2>&1 \
// RUN: | FileCheck -check-prefix=DEVICE-LIBS %s
// DEVICE-LIBS: "-mlink-builtin-bitcode" "[[ROCM_PATH:.+]]ockl.bc"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s
+// PROFILE: ld.lld
+// PROFILE-SAME: "[[RESOURCE_DIR:.+]]{{/|\\\\}}lib{{/|\\\\}}amdgcn-amd-amdhsa{{/|\\\\}}libclang_rt.profile.a"
diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c
index ed2853cae3ccc..1dea9426f75ce 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -112,3 +112,9 @@
// RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=PATH %s
// PATH: clang-nvlink-wrapper{{.*}}"--cuda-path={{.*}}/Inputs/CUDA/usr/local/cuda"
+
+// RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 -nogpulib \
+// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s
+// PROFILE: clang-nvlink-wrapper
+// PROFILE-SAME: "[[RESOURCE_DIR:.+]]{{/|\\\\}}lib{{/|\\\\}}nvptx64-nvidia-cuda{{/|\\\\}}libclang_rt.profile.a"
diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index fb1bc9ffdbbd4..e057959d62044 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -410,3 +410,21 @@
// RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s
//
// SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906"
+
+//
+// Check that `-fprofile-generate` flags are forwarded to link in the runtime
+// only if present in the resource directory.
+//
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN: --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 2>&1 \
+// RUN: | FileCheck --check-prefix=PROFILE %s
+//
+// PROFILE: clang-linker-wrapper{{.*}}--device-compiler=amdgcn-amd-amdhsa=-fprofile-generate
+//
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -resource-dir=%S/Inputs/resource_dir \
+// RUN: --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 2>&1 \
+// RUN: | FileCheck --check-prefix=NO-PROFILE %s
+//
+// NO-PROFILE-NOT: --device-compiler=amdgcn-amd-amdhsa=-fprofile-generate
More information about the cfe-commits
mailing list