[clang] [HIP] Fix `-flto` overriding `--no-lto` not that it is default (PR #202699)
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 9 09:18:44 PDT 2026
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/202699
>From 230a76e5fe5b54c88a1d9fc5fe2c9515e0bb13d6 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 9 Jun 2026 10:53:33 -0500
Subject: [PATCH] [HIP] Fix `-flto` overriding `--no-lto` not that it is
default
Summary:
The previous changes to LTO made the flto flag passed by default which
overrode the hack we did to ervert to the old non-LTO pipline. This is a
temporary hack so I'm hacking it even further to fix it.
---
.../linker-wrapper-hip-no-rdc.c | 6 +++---
.../clang-linker-wrapper/linker-wrapper.c | 14 +++++++-------
.../clang-linker-wrapper/ClangLinkerWrapper.cpp | 17 ++++++-----------
3 files changed, 16 insertions(+), 21 deletions(-)
diff --git a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c
index 5c5b7b1eabfab..80ac493825aad 100644
--- a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c
+++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c
@@ -59,11 +59,11 @@ __attribute__((visibility("protected"), used)) int x;
// Without --no-lto the AMDGPU device compilation uses the LTO pipeline
// (-flto).
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.lto.hipfb 2>&1 | FileCheck %s --check-prefix=LTO
-// LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -flto
+// LTO: clang{{.*}} -mcpu=gfx1200
// With --no-lto the AMDGPU device compilation uses the conventional non-LTO
// pipeline: -flto must not be passed, and '-x ir' must be passed so Clang
// compiles the bitcode (stored in an object-extension file) instead of
// handing it to the LTO link.
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --no-lto --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.nolto.hipfb 2>&1 | FileCheck %s --check-prefix=NO-LTO --implicit-check-not=-flto
-// NO-LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -x ir
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --no-lto --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.nolto.hipfb 2>&1 | FileCheck %s --check-prefix=NO-LTO
+// NO-LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -x ir {{.*}}-flto=none
diff --git a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c
index 39b9bcd7425ab..a19a11e87afdb 100644
--- a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c
+++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c
@@ -40,7 +40,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK
-// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o
// RUN: llvm-offload-binary -o %t.out \
// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -49,7 +49,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-compiler=--save-temps \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS
-// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -flto {{.*}}-save-temps
+// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}-save-temps
// RUN: llvm-offload-binary -o %t.out \
// RUN: --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
@@ -159,7 +159,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND
-// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
+// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o
// RUN: llvm-offload-binary -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -182,8 +182,8 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -Wl,--no-undefined {{.*}}.o {{.*}}.o
// RUN: llvm-offload-binary -o %t-lib.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -198,8 +198,8 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o
// RUN: llvm-offload-binary -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 6e4fc7060389c..cfdd11e1d298d 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -534,16 +534,6 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
Triple.isAMDGPU() ? CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch))
: CmdArgs.push_back(Args.MakeArgString("-march=" + Arch));
- // AMDGPU defaults to the LTO pipeline. Non-RDC HIP uses the conventional
- // non-LTO pipeline so device codegen still runs here, in parallel, instead
- // of being deferred to the LTO link.
- // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should
- // share a unified interface so runtime libraries can be provided to non-RDC
- // compilations without relying on -mlink-builtin-bitcode.
- bool NonLTOAMDGPU = Triple.isAMDGPU() && Args.hasArg(OPT_no_lto);
- if (Triple.isAMDGPU() && !NonLTOAMDGPU)
- CmdArgs.push_back("-flto");
-
// Forward all of the `--offload-opt` and `-mllvm` options to the device.
for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
CmdArgs.append(
@@ -557,7 +547,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
// Force the IR input language so Clang runs the compile and backend phases
// instead of treating them as linker inputs, which would defer codegen to
// the LTO link and defeat the non-LTO pipeline.
- if (NonLTOAMDGPU)
+ // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should
+ // share a unified interface.
+ if (Args.hasArg(OPT_no_lto))
CmdArgs.append({"-x", "ir"});
for (StringRef InputFile : InputFiles)
CmdArgs.push_back(InputFile);
@@ -621,6 +613,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
CmdArgs.push_back(Args.MakeArgString(Arg));
+ if (Args.hasArg(OPT_no_lto))
+ CmdArgs.append({"-flto=none", "-Wno-unused-command-line-argument"});
+
if (Error Err = executeCommands(*ClangPath, CmdArgs))
return std::move(Err);
More information about the cfe-commits
mailing list