[clang] [flang] [clang] Add option to specify opt pipeline during offload lto (PR #114401)

Thu Oct 31 06:21:43 PDT 2024

https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/114401

Especially useful for experimenting with 'default' vs 'lto' pipelines.

New driver option '-offload-lto-opt-pipeline=<value>' is forwarded to clang-linker-wrapper as '-lto-opt-pipeline=<value>' which is then forwarded to clang as '-Xlinker --lto-newpm-passes=<value>' and then finally as '--lto-newpm-passes=<value>' to lld.

>From 2c4ddda58ee35141a1b59e92f1894d76b7bac43e Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Thu, 31 Oct 2024 08:06:29 -0500
Subject: [PATCH] [clang] Add option to specify opt pipeline during offload lto

Especially useful for experimenting with 'default' vs 'lto' pipelines.

New driver option '-offload-lto-opt-pipeline=<value>' is forwarded to
clang-linker-wrapper as '-lto-opt-pipeline=<value>' which is then forwarded to
clang as '-Xlinker --lto-newpm-passes=<value>' and then finally as
'--lto-newpm-passes=<value>' to lld.
---
 clang/include/clang/Driver/Options.td         |  4 +++
 clang/lib/Driver/ToolChains/Clang.cpp         |  6 ++++
 clang/test/Driver/amdgpu-openmp-toolchain.c   | 18 ++++++++++
 clang/test/Driver/linker-wrapper.c            | 34 +++++++++++++++++--
 .../ClangLinkerWrapper.cpp                    | 12 +++++++
 .../clang-linker-wrapper/LinkerWrapperOpts.td |  4 +++
 flang/test/Driver/offload-lto-pipeline.f90    | 20 +++++++++++
 7 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Driver/offload-lto-pipeline.f90

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b9ee1a0e669ed..c78eb23e134e04 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1231,6 +1231,10 @@ def offload_host_device : Flag<["--"], "offload-host-device">,
   Visibility<[ClangOption, FlangOption]>,
   HelpText<"Compile for both the offloading host and device (default).">;
 
+def offload_lto_opt_pipeline_EQ : Joined<["-"], "offload-lto-opt-pipeline=">,
+  Visibility<[ClangOption, FlangOption]>, Flags<[HelpHidden]>,
+  HelpText<"Optimization pipeline to use during offload linking.">;
+
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
                               "'-aux-target-cpu' and '-aux-target-feature'.">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 4c6f508f1f24a6..128b9f29358bf4 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9228,6 +9228,12 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_save_temps_EQ))
     CmdArgs.push_back("--save-temps");
 
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_offload_lto_opt_pipeline_EQ)) {
+    CmdArgs.push_back(
+        Args.MakeArgString(Twine("--lto-opt-pipeline=") + A->getValue()));
+  }
+
   // Construct the link job so we can wrap around it.
   Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput);
   const auto &LinkCommand = C.getJobs().getJobs().back();
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c
index f596708047c154..57aedb35024ac1 100644
--- a/clang/test/Driver/amdgpu-openmp-toolchain.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -81,3 +81,21 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp --offload-arch=gfx803 \
 // RUN:     -stdlib=libc++ -nogpulib %s 2>&1 | FileCheck %s --check-prefix=LIBCXX
 // LIBCXX-NOT: include/amdgcn-amd-amdhsa/c++/v1
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
+// RUN:     -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa \
+// RUN:     -march=gfx803 -nogpulib %s \
+// RUN:     2>&1 | FileCheck %s --check-prefix=CHECK-LTO-OPT-PL-00
+// CHECK-LTO-OPT-PL-00-NOT: clang-linker-wrapper{{.*}} "--lto-opt-pipeline"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
+// RUN:     -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa \
+// RUN:     -march=gfx803 -nogpulib -offload-lto-opt-pipeline=lto %s \
+// RUN:     2>&1 | FileCheck %s --check-prefix=CHECK-LTO-OPT-PL-01
+// CHECK-LTO-OPT-PL-01: clang-linker-wrapper{{.*}} "--lto-opt-pipeline=lto"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
+// RUN:     -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa \
+// RUN:     -march=gfx803 -nogpulib "-offload-lto-opt-pipeline=default<O3>" %s \
+// RUN:     2>&1 | FileCheck %s --check-prefix=CHECK-LTO-OPT-PL-02
+// CHECK-LTO-OPT-PL-02: clang-linker-wrapper{{.*}} "--lto-opt-pipeline=default<O3>"
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 470af4d5d70cac..da4a3fe31afd07 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG
 
-// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g 
+// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
@@ -93,7 +93,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70
 // CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52
-// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]] 
+// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
 // CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
@@ -254,3 +254,33 @@ __attribute__((visibility("protected"), used)) int x;
 // Error handling when --linker-path is not provided for clang-linker-wrapper
 // RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
 // LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'
+
+// RUN: clang-linker-wrapper --lto-opt-pipeline=default \
+// RUN:   --dry-run --wrapper-verbose --host-triple=x86_64-unknown-linux-gnu \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out \
+// RUN:   2>&1 | FileCheck %s --check-prefix=LTO-OPT-PL-00
+// LTO-OPT-PL-00: "{{.*}}clang" {{.*}} -Xlinker --lto-newpm-passes=default<O2>
+
+// RUN: clang-linker-wrapper --lto-opt-pipeline=default --opt-level=O3 \
+// RUN:   --dry-run --wrapper-verbose --host-triple=x86_64-unknown-linux-gnu \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out \
+// RUN:   2>&1 | FileCheck %s --check-prefix=LTO-OPT-PL-01
+// LTO-OPT-PL-01: "{{.*}}clang" {{.*}} -Xlinker --lto-newpm-passes=default<O3>
+
+// RUN: clang-linker-wrapper --lto-opt-pipeline=lto \
+// RUN:   --dry-run --wrapper-verbose --host-triple=x86_64-unknown-linux-gnu \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out \
+// RUN:   2>&1 | FileCheck %s --check-prefix=LTO-OPT-PL-02
+// LTO-OPT-PL-02: "{{.*}}clang" {{.*}} -Xlinker --lto-newpm-passes=lto<O2>
+
+// RUN: clang-linker-wrapper --lto-opt-pipeline=lto --opt-level=O0 \
+// RUN:   --dry-run --wrapper-verbose --host-triple=x86_64-unknown-linux-gnu \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out \
+// RUN:   2>&1 | FileCheck %s --check-prefix=LTO-OPT-PL-03
+// LTO-OPT-PL-03: "{{.*}}clang" {{.*}} -Xlinker --lto-newpm-passes=lto<O0>
+
+// RUN: clang-linker-wrapper \
+// RUN:   --dry-run --wrapper-verbose --host-triple=x86_64-unknown-linux-gnu \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out \
+// RUN:   2>&1 | FileCheck %s --check-prefix=LTO-OPT-PL-04
+// LTO-OPT-PL-04-NOT: --lto-newpm-passes
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index ebafd7eb7774ec..10b82a533aaddd 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -570,6 +570,18 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
 
   for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
     CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)});
+
+  StringRef LTOOptPipeline = Args.getLastArgValue(OPT_lto_opt_pipeline_EQ, "");
+  if (LTOOptPipeline == "default" || LTOOptPipeline == "lto" ||
+      LTOOptPipeline == "thinlto") {
+    // for convenience, add "<On>"
+    LTOOptPipeline = Args.MakeArgString(LTOOptPipeline + "<" + OptLevel + ">");
+  }
+  if (LTOOptPipeline.size()) {
+    CmdArgs.append({"-Xlinker", Args.MakeArgString("--lto-newpm-passes=" +
+                                                   LTOOptPipeline)});
+  }
+
   for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
     CmdArgs.push_back(Args.MakeArgString(Arg));
 
diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
index 57d918db0a73ce..acd50067d98dc6 100644
--- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -71,6 +71,10 @@ def override_image : Joined<["--"], "override-image=">,
   Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">,
   HelpText<"Uses the provided file as if it were the output of the device link step">;
 
+def lto_opt_pipeline_EQ : Joined<["--"], "lto-opt-pipeline=">,
+  Flags<[WrapperOnlyOption]>,
+  HelpText<"Optimization pipeline to use during LTO.">;
+
 // Flags passed to the device linker.
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<arch>">,
diff --git a/flang/test/Driver/offload-lto-pipeline.f90 b/flang/test/Driver/offload-lto-pipeline.f90
new file mode 100644
index 00000000000000..c81762307820ab
--- /dev/null
+++ b/flang/test/Driver/offload-lto-pipeline.f90
@@ -0,0 +1,20 @@
+! Test forwarding/generation of -lto-opt-pipeline to the clang-linker-wrapper
+  
+! RUN: %flang -### %s -o %t 2>&1 -fopenmp --offload-arch=gfx90a \
+! RUN:   --target=aarch64-unknown-linux-gnu -nogpulib \
+! RUN:   | FileCheck %s --check-prefix=CHECK-LTO-OPT-PL-00
+! CHECK-LTO-OPT-PL-00-NOT: clang-linker-wrapper{{.*}} "--lto-opt-pipeline"
+
+! RUN: %flang -### %s -o %t 2>&1 -fopenmp --offload-arch=gfx90a \
+! RUN:   --target=aarch64-unknown-linux-gnu -nogpulib \
+! RUN:   -offload-lto-opt-pipeline=lto \
+! RUN:   | FileCheck %s --check-prefix=CHECK-LTO-OPT-PL-01
+! CHECK-LTO-OPT-PL-01: clang-linker-wrapper{{.*}} "--lto-opt-pipeline=lto"
+
+! RUN: %flang -### %s -o %t 2>&1 -fopenmp --offload-arch=gfx90a \
+! RUN:   --target=aarch64-unknown-linux-gnu -nogpulib \
+! RUN:   "-offload-lto-opt-pipeline=default<O3>" \
+! RUN:   | FileCheck %s --check-prefix=CHECK-LTO-OPT-PL-02
+! CHECK-LTO-OPT-PL-02: clang-linker-wrapper{{.*}} "--lto-opt-pipeline=default<O3>"
+
+