[clang] [Clang] Handle `-flto-partitions` generically and forward it properly (PR #133283)

via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 27 10:27:48 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-driver

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
The https://github.com/llvm/llvm-project/pull/128509 patch introduced
`--flto-partitions`. This was marked as a HIP only argument, and was
also spelled and handled incorrectly for an `-f` option. This patch
makes the handling generic for `ld.lld` consumers.

This also fixes some issues with emitting the flags being put after the
default arguments, preventing users from overriding them. Also, forwards
things properly for the new driver so we can test this.


---
Full diff: https://github.com/llvm/llvm-project/pull/133283.diff


9 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+2-2) 
- (modified) clang/lib/Driver/ToolChains/AMDGPU.cpp (+4-27) 
- (modified) clang/lib/Driver/ToolChains/Clang.cpp (+3-1) 
- (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+11) 
- (modified) clang/lib/Driver/ToolChains/HIPAMD.cpp (+4-2) 
- (modified) clang/test/Driver/amdgpu-toolchain.c (+3-3) 
- (modified) clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip (+6-6) 
- (modified) clang/test/Driver/hip-toolchain-rdc-static-lib.hip (+2-2) 
- (modified) clang/test/Driver/hip-toolchain-rdc.hip (+15-11) 


``````````diff
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a7fcb160d3867..b43c53f5f419d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1392,8 +1392,6 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">,
   HelpText<"Compile HIP source to relocatable">;
 def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">,
   HelpText<"Do not override toolchain to compile HIP source to relocatable">;
-def flto_partitions_EQ : Joined<["--"], "flto-partitions=">, Group<hip_Group>,
-  HelpText<"Number of partitions to use for parallel full LTO codegen. Use 1 to disable partitioning.">;
 }
 
 // Clang specific/exclusive options for OpenACC.
@@ -3043,6 +3041,8 @@ defm fat_lto_objects : BoolFOption<"fat-lto-objects",
   PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
   NegFlag<SetFalse, [], [ClangOption, CC1Option], "Disable">,
   BothFlags<[], [ClangOption, CC1Option], " fat LTO object support">>;
+def flto_partitions_EQ : Joined<["-"], "flto-partitions=">, Group<f_Group>,
+  HelpText<"Number of partitions to use for parallel full LTO codegen, ld.lld only.">;
 def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
   Group<f_Group>, Visibility<[ClangOption, CC1Option, CLOption]>,
   HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">,
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 1c5bb08568801..72c03fb3154e2 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -625,22 +625,19 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-shared");
   }
 
-  addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
-  Args.AddAllArgs(CmdArgs, options::OPT_L);
-  getToolChain().AddFilePathLibArgs(Args, CmdArgs);
-  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
   if (C.getDriver().isUsingLTO()) {
     const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
     addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
-
-    if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
-      addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
   } else if (Args.hasArg(options::OPT_mcpu_EQ)) {
     CmdArgs.push_back(Args.MakeArgString(
         "-plugin-opt=mcpu=" +
         getProcessorFromTargetID(getToolChain().getTriple(),
                                  Args.getLastArgValue(options::OPT_mcpu_EQ))));
   }
+  addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
+  getToolChain().AddFilePathLibArgs(Args, CmdArgs);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
+  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
   // Always pass the target-id features to the LTO job.
   std::vector<StringRef> Features;
@@ -711,26 +708,6 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
                             options::OPT_m_amdgpu_Features_Group);
 }
 
-static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
-  int Value = 0;
-  StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
-  if (A.getAsInteger(10, Value) || (Value < 1)) {
-    Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
-    D.Diag(diag::err_drv_invalid_int_value)
-        << Arg->getAsString(Args) << Arg->getValue();
-    return 1;
-  }
-
-  return Value;
-}
-
-void amdgpu::addFullLTOPartitionOption(const Driver &D,
-                                       const llvm::opt::ArgList &Args,
-                                       llvm::opt::ArgStringList &CmdArgs) {
-  CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
-                                       Twine(getFullLTOPartitions(D, Args))));
-}
-
 /// AMDGPU Toolchain
 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index e1e8f57dd6455..40ecc0aee48b3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9217,6 +9217,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
       OPT_load,
       OPT_fno_lto,
       OPT_flto,
+      OPT_flto_partitions_EQ,
       OPT_flto_EQ};
   const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input};
   auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A) {
@@ -9226,7 +9227,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
   };
 
   ArgStringList CmdArgs;
-  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP,
+                                   Action::OFK_HIP, Action::OFK_SYCL}) {
     auto TCRange = C.getOffloadToolChains(Kind);
     for (auto &I : llvm::make_range(TCRange)) {
       const ToolChain *TC = I.second;
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 1945c95458c54..41cfa3d2e4f8c 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -899,6 +899,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
     // files
     if (IsFatLTO)
       CmdArgs.push_back("--fat-lto-objects");
+
+    if (Args.hasArg(options::OPT_flto_partitions_EQ)) {
+      int Value = 0;
+      StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
+      if (A.getAsInteger(10, Value) || (Value < 1)) {
+        Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
+        D.Diag(diag::err_drv_invalid_int_value)
+            << Arg->getAsString(Args) << Arg->getValue();
+      }
+      CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + A));
+    }
   }
 
   const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt=";
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index dc3300b00f9ff..abb83701759ce 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -116,8 +116,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
 
   addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
 
-  amdgpu::addFullLTOPartitionOption(D, Args, LldArgs);
-
   // Given that host and device linking happen in separate processes, the device
   // linker doesn't always have the visibility as to which device symbols are
   // needed by a program, especially for the device symbol dependencies that are
@@ -294,6 +292,10 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
     checkTargetID(*DAL);
   }
 
+  if (!Args.hasArg(options::OPT_flto_partitions_EQ))
+    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
+                      "8");
+
   return DAL;
 }
 
diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c
index 20656f1fadeb7..2a35c13746a0e 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -20,12 +20,12 @@
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
 // RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
+// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions"
+// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}}
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
 // RUN:   -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
-// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
+// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}}
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
 // RUN:   -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
diff --git a/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip b/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip
index e345bd3f5be6b..4439547ea8ad9 100644
--- a/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip
@@ -1,5 +1,5 @@
 // RUN: %clang -### --target=x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --flto-partitions=42 \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 -flto-partitions=42 \
 // RUN:   --no-offload-new-driver --emit-static-lib -nogpulib \
 // RUN:   -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
@@ -10,26 +10,26 @@
 // FIXED-PARTS-NOT: ".*opt"
 // FIXED-PARTS-NOT: ".*llc"
 // FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
-// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
 // FIXED-PARTS-SAME: "--lto-partitions=42"
+// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
 // FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}"
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --flto-partitions=a \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \
 // RUN:   --no-offload-new-driver --emit-static-lib -nogpulib \
 // RUN:   -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV0
 
-// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
+// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --flto-partitions=0 \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 -flto-partitions=0 \
 // RUN:   --no-offload-new-driver --emit-static-lib -nogpulib \
 // RUN:   -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV1
 
-// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
+// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'
diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
index 6f38a06f7cf31..05d276ba57bda 100644
--- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
@@ -48,8 +48,8 @@
 // CHECK-NOT: ".*opt"
 // CHECK-NOT: ".*llc"
 // CHECK: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
-// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
 // CHECK-SAME: "--lto-partitions={{[0-9]+}}"
+// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
 // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]]
 
 // generate image for device side path on gfx900
@@ -77,8 +77,8 @@
 // CHECK-NOT: ".*opt"
 // CHECK-NOT: ".*llc"
 // CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
-// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
 // CHECK-SAME: "--lto-partitions={{[0-9]+}}"
+// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
 // CHECK-SAME: "--whole-archive"
 // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]]
 // CHECK-SAME: "--no-whole-archive"
diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip
index d5119e71a3569..204400aeaa15d 100644
--- a/clang/test/Driver/hip-toolchain-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-rdc.hip
@@ -146,8 +146,8 @@
 // CHECK-NOT: ".*opt"
 // CHECK-NOT: ".*llc"
 // CHECK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
-// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
 // CHECK-SAME: "--lto-partitions={{[0-9]+}}"
+// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
 // CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
 
 // combine images generated into hip fat binary object
@@ -162,20 +162,24 @@
 // LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
 // MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
 
-// Check --flto-partitions
+// Check -flto-partitions
 
-// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
+// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
 // RUN:   -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
 // LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
 
-// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
-// RUN:   -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
+// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --offload-new-driver \
+// RUN:   -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT_NEW %s
+// LTO_DEFAULT_NEW: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-flto-partitions=8"
+
+// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
+// RUN:   -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
 // LTO_PARTS: lld{{.*}}"--lto-partitions=42"
 
-// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
-// RUN:   -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
-// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
+// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
+// RUN:   -L. -foffload-lto -flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
+// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
 
-// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
-// RUN:   -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
-// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
+// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
+// RUN:   -L. -foffload-lto -flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
+// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

``````````

</details>


https://github.com/llvm/llvm-project/pull/133283


More information about the cfe-commits mailing list