[clang] bec49b1 - [LinkerWrapper] Use `clang` to perform the device linking
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 19 10:47:58 PST 2023
Author: Joseph Huber
Date: 2023-01-19T12:47:51-06:00
New Revision: bec49b1d803ced901e14aca154ffc106939100e5
URL: https://github.com/llvm/llvm-project/commit/bec49b1d803ced901e14aca154ffc106939100e5
DIFF: https://github.com/llvm/llvm-project/commit/bec49b1d803ced901e14aca154ffc106939100e5.diff
LOG: [LinkerWrapper] Use `clang` to perform the device linking
Right now in the linker wrapper we manually invoke a lot of the
toolchain programs. This reproduces a lot of logic that is already
handled in clang. Since D140158 we can now target all supported
toolchains directly via cross-compilation.
This patch changes the linker wrapper to consolidate all the alternate
linking and assembler steps into a generic call to `clang` and let clang
handle the argument handling. This heavily simplifies the interface.
Reviewed By: tra, JonChesterfield
Differential Revision: https://reviews.llvm.org/D142133
Added:
Modified:
clang/test/Driver/linker-wrapper.c
clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Removed:
################################################################################
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index b052c077b92cd..d515e316da0f4 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -12,17 +12,11 @@
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK
-
-// NVPTX-LINK: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o
-
-// RUN: clang-offload-packager -o %t.out \
-// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HOST-BC
+// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK
-// HOST-BC: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o
+// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -31,16 +25,7 @@
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG
-// NVPTX-LINK-DEBUG: nvlink{{.*}}-m64 -g -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o
-
-// RUN: clang-offload-packager -o %t.out \
-// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O2 \
-// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG-LTO
-
-// NVPTX-LINK-DEBUG-LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 -lineinfo {{.*}}.s
+// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -Wl,--no-undefined -g {{.*}}.o {{.*}}.o
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
@@ -49,7 +34,7 @@
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK
-// AMDGPU-LINK: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o {{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -58,17 +43,7 @@
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS
-// AMDGPU-LTO-TEMPS: clang{{.*}}-o [[OBJ:.+]] -fPIC -c --target=amdgcn-amd-amdhsa -O2 -mcpu=gfx1030 {{.*}}.s
-// AMDGPU-LTO-TEMPS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx1030 -o {{.*}}.img {{.*}}.o
-
-// RUN: clang-offload-packager -o %t.out \
-// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
-// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK-LTO
-
-// AMDGPU-LINK-LTO: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o
+// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined -save-temps {{.*}}.s
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -77,7 +52,7 @@
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld.lld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
-// CPU-LINK: ld.lld{{.*}}-m elf_x86_64 -shared -Bsymbolic -o {{.*}}.img {{.*}}.o {{.*}}.o
+// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -Wl,--no-undefined -Bsymbolic -shared {{.*}}.o {{.*}}.o
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \
@@ -86,25 +61,6 @@
// HOST-LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out
// HOST-LINK-NOT: ld.lld{{.*}}-abc
-// RUN: clang-offload-packager -o %t.out \
-// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN: --image=file=%t.nvptx.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LTO
-
-// LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 {{.*}}.s
-// LTO-NOT: nvlink
-
-// RUN: clang-offload-packager -o %t.out \
-// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-OMP-LINK
-
-// CUDA-OMP-LINK: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o
-
// RUN: clang-offload-packager -o %t-lib.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52
@@ -116,8 +72,8 @@
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld -- %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY
-// STATIC-LIBRARY: nvlink{{.*}} -arch sm_70
-// STATIC-LIBRARY-NOT: nvlink{{.*}} -arch sm_50
+// STATIC-LIBRARY: clang{{.*}} -march=sm_70
+// STATIC-LIBRARY-NOT: clang{{.*}} -march=sm_50
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -128,9 +84,9 @@
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA
-// CUDA: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_52 {{.*}}.o
-// CUDA: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o {{.*}}.o
-// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file={{.*}}.img --image=profile=sm_52,file={{.*}}.img
+// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52
+// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70
+// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_80 \
@@ -153,9 +109,9 @@
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
-// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o
-// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx90a -o {{.*}}.img {{.*}}.o
-// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input={{.*}}.img -input={{.*}}.img -output={{.*}}.hipfb
+// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908
+// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a
+// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
@@ -166,8 +122,8 @@
// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LINKER-ARGS
-// LINKER-ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.img {{.*}}.o a
-// LINKER-ARGS: nvlink{{.*}}-m64 -o {{.*}}.img -arch sm_70 {{.*}}.o a b
+// LINKER-ARGS: clang{{.*}}--target=amdgcn-amd-amdhsa{{.*}}-Wl,a
+// LINKER-ARGS: clang{{.*}}--target=nvptx64-nvidia-cuda{{.*}}-Wl,a -Wl,b
// RUN: not clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -ldummy \
// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 1daa58f20fd5b..5cae73ef543eb 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -271,99 +271,13 @@ void printVersion(raw_ostream &OS) {
}
namespace nvptx {
-Expected<StringRef> assemble(StringRef InputFile, const ArgList &Args,
- bool RDC = true) {
- llvm::TimeTraceScope TimeScope("NVPTX Assembler");
- // NVPTX uses the ptxas binary to create device object files.
- Expected<std::string> PtxasPath = findProgram("ptxas", {CudaBinaryPath});
- if (!PtxasPath)
- return PtxasPath.takeError();
-
- const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
- StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
- // Create a new file to write the linked device image to. Assume that the
- // input filename already has the device and architecture.
- auto TempFileOrErr = createOutputFile(sys::path::stem(InputFile), "cubin");
- if (!TempFileOrErr)
- return TempFileOrErr.takeError();
-
- SmallVector<StringRef, 16> CmdArgs;
- StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
- CmdArgs.push_back(*PtxasPath);
- CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32");
- if (Verbose)
- CmdArgs.push_back("-v");
- for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg))
- CmdArgs.push_back(Args.MakeArgString(Arg));
- CmdArgs.push_back("-o");
- CmdArgs.push_back(*TempFileOrErr);
- CmdArgs.push_back(Args.MakeArgString("-" + OptLevel));
- CmdArgs.push_back("--gpu-name");
- CmdArgs.push_back(Arch);
- if (Args.hasArg(OPT_debug) && OptLevel[1] == '0')
- CmdArgs.push_back("-g");
- else if (Args.hasArg(OPT_debug))
- CmdArgs.push_back("-lineinfo");
- if (RDC)
- CmdArgs.push_back("-c");
-
- CmdArgs.push_back(InputFile);
-
- if (Error Err = executeCommands(*PtxasPath, CmdArgs))
- return std::move(Err);
-
- return *TempFileOrErr;
-}
-
-Expected<StringRef> link(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
- llvm::TimeTraceScope TimeScope("NVPTX linker");
- // NVPTX uses the nvlink binary to link device object files.
- Expected<std::string> NvlinkPath = findProgram("nvlink", {CudaBinaryPath});
- if (!NvlinkPath)
- return NvlinkPath.takeError();
-
- const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
- StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
-
- // Create a new file to write the linked device image to.
- auto TempFileOrErr =
- createOutputFile(sys::path::filename(ExecutableName) + "." +
- Triple.getArchName() + "." + Arch,
- "img");
- if (!TempFileOrErr)
- return TempFileOrErr.takeError();
-
- SmallVector<StringRef, 16> CmdArgs;
- CmdArgs.push_back(*NvlinkPath);
- CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32");
- if (Args.hasArg(OPT_debug))
- CmdArgs.push_back("-g");
- if (Verbose)
- CmdArgs.push_back("-v");
- CmdArgs.push_back("-o");
- CmdArgs.push_back(*TempFileOrErr);
- CmdArgs.push_back("-arch");
- CmdArgs.push_back(Arch);
-
- // Add extracted input files.
- for (StringRef Input : InputFiles)
- CmdArgs.push_back(Input);
-
- for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
- CmdArgs.push_back(Args.MakeArgString(Arg));
- if (Error Err = executeCommands(*NvlinkPath, CmdArgs))
- return std::move(Err);
-
- return *TempFileOrErr;
-}
-
Expected<StringRef>
fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
const ArgList &Args) {
llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
// NVPTX uses the fatbinary program to bundle the linked images.
Expected<std::string> FatBinaryPath =
- findProgram("fatbinary", {CudaBinaryPath});
+ findProgram("fatbinary", {CudaBinaryPath + "/bin"});
if (!FatBinaryPath)
return FatBinaryPath.takeError();
@@ -393,49 +307,6 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
} // namespace nvptx
namespace amdgcn {
-Expected<StringRef> link(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
- llvm::TimeTraceScope TimeScope("AMDGPU linker");
- // AMDGPU uses lld to link device object files.
- Expected<std::string> LLDPath =
- findProgram("lld", {getMainExecutable("lld")});
- if (!LLDPath)
- return LLDPath.takeError();
-
- const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
- StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
-
- // Create a new file to write the linked device image to.
- auto TempFileOrErr =
- createOutputFile(sys::path::filename(ExecutableName) + "." +
- Triple.getArchName() + "." + Arch,
- "img");
- if (!TempFileOrErr)
- return TempFileOrErr.takeError();
- std::string ArchArg = ("-plugin-opt=mcpu=" + Arch).str();
-
- SmallVector<StringRef, 16> CmdArgs;
- CmdArgs.push_back(*LLDPath);
- CmdArgs.push_back("-flavor");
- CmdArgs.push_back("gnu");
- CmdArgs.push_back("--no-undefined");
- CmdArgs.push_back("-shared");
- CmdArgs.push_back("-plugin-opt=-amdgpu-internalize-symbols");
- CmdArgs.push_back(ArchArg);
- CmdArgs.push_back("-o");
- CmdArgs.push_back(*TempFileOrErr);
-
- // Add extracted input files.
- for (StringRef Input : InputFiles)
- CmdArgs.push_back(Input);
-
- for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
- CmdArgs.push_back(Args.MakeArgString(Arg));
- if (Error Err = executeCommands(*LLDPath, CmdArgs))
- return std::move(Err);
-
- return *TempFileOrErr;
-}
-
Expected<StringRef>
fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
const ArgList &Args) {
@@ -483,35 +354,9 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
} // namespace amdgcn
namespace generic {
-
-const char *getLDMOption(const llvm::Triple &T) {
- switch (T.getArch()) {
- case llvm::Triple::x86:
- if (T.isOSIAMCU())
- return "elf_iamcu";
- return "elf_i386";
- case llvm::Triple::aarch64:
- return "aarch64linux";
- case llvm::Triple::aarch64_be:
- return "aarch64linuxb";
- case llvm::Triple::ppc64:
- return "elf64ppc";
- case llvm::Triple::ppc64le:
- return "elf64lppc";
- case llvm::Triple::x86_64:
- if (T.isX32())
- return "elf32_x86_64";
- return "elf_x86_64";
- case llvm::Triple::ve:
- return "elf64ve";
- default:
- return nullptr;
- }
-}
-
-Expected<StringRef> assemble(StringRef InputFile, const ArgList &Args) {
- llvm::TimeTraceScope TimeScope("Clang Assembler");
- // Use `clang` to invoke the generic assembler.
+Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
+ llvm::TimeTraceScope TimeScope("Clang");
+ // Use `clang` to invoke the appropriate device tools.
Expected<std::string> ClangPath =
findProgram("clang", {getMainExecutable("clang")});
if (!ClangPath)
@@ -519,9 +364,14 @@ Expected<StringRef> assemble(StringRef InputFile, const ArgList &Args) {
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
+ if (Arch.empty())
+ Arch = "native";
// Create a new file to write the linked device image to. Assume that the
// input filename already has the device and architecture.
- auto TempFileOrErr = createOutputFile(sys::path::stem(InputFile), "o");
+ auto TempFileOrErr =
+ createOutputFile(sys::path::filename(ExecutableName) + "." +
+ Triple.getArchName() + "." + Arch,
+ "img");
if (!TempFileOrErr)
return TempFileOrErr.takeError();
@@ -530,65 +380,47 @@ Expected<StringRef> assemble(StringRef InputFile, const ArgList &Args) {
*ClangPath,
"-o",
*TempFileOrErr,
- "-fPIC",
- "-c",
Args.MakeArgString("--target=" + Triple.getTriple()),
- Args.MakeArgString("-" + OptLevel),
Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch)
: Args.MakeArgString("-march=" + Arch),
- InputFile,
+ Args.MakeArgString("-" + OptLevel),
+ "-Wl,--no-undefined",
};
- if (Error Err = executeCommands(*ClangPath, CmdArgs))
- return std::move(Err);
+ // If this is CPU offloading we copy the input libraries.
+ if (!Triple.isAMDGPU() && !Triple.isNVPTX()) {
+ CmdArgs.push_back("-Bsymbolic");
+ CmdArgs.push_back("-shared");
+ ArgStringList LinkerArgs;
+ for (const opt::Arg *Arg :
+ Args.filtered(OPT_library, OPT_rpath, OPT_library_path))
+ Arg->render(Args, LinkerArgs);
+ llvm::copy(LinkerArgs, std::back_inserter(CmdArgs));
+ }
- return *TempFileOrErr;
-}
+ if (Args.hasArg(OPT_debug))
+ CmdArgs.push_back("-g");
-Expected<StringRef> link(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
- llvm::TimeTraceScope TimeScope("Generic linker");
- const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
- StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
+ if (SaveTemps)
+ CmdArgs.push_back("-save-temps");
- // Create a new file to write the linked device image to.
- auto TempFileOrErr =
- createOutputFile(sys::path::filename(ExecutableName) + "." +
- Triple.getArchName() + "." + Arch,
- "img");
- if (!TempFileOrErr)
- return TempFileOrErr.takeError();
+ if (Verbose)
+ CmdArgs.push_back("-v");
- // Use the host linker to perform generic offloading. Use the same libraries
- // and paths as the host application does.
- SmallVector<StringRef, 16> CmdArgs;
- CmdArgs.push_back(Args.getLastArgValue(OPT_linker_path_EQ));
- CmdArgs.push_back("-m");
- CmdArgs.push_back(getLDMOption(Triple));
- CmdArgs.push_back("-shared");
+ if (!CudaBinaryPath.empty())
+ CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath));
- ArgStringList LinkerArgs;
- for (const opt::Arg *Arg : Args) {
- auto Op = Arg->getOption();
- if (Op.matches(OPT_library) || Op.matches(OPT_library_path) ||
- Op.matches(OPT_as_needed) || Op.matches(OPT_no_as_needed) ||
- Op.matches(OPT_rpath) || Op.matches(OPT_dynamic_linker))
- Arg->render(Args, LinkerArgs);
- }
- for (StringRef Arg : LinkerArgs)
- CmdArgs.push_back(Arg);
+ for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg))
+ llvm::copy(SmallVector<StringRef>({"-Xcuda-ptxas", Arg}),
+ std::back_inserter(CmdArgs));
- CmdArgs.push_back("-Bsymbolic");
- CmdArgs.push_back("-o");
- CmdArgs.push_back(*TempFileOrErr);
+ for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
+ CmdArgs.push_back(Args.MakeArgString("-Wl," + Arg));
- // Add extracted input files.
- for (StringRef Input : InputFiles)
- CmdArgs.push_back(Input);
+ for (StringRef InputFile : InputFiles)
+ CmdArgs.push_back(InputFile);
- for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
- CmdArgs.push_back(Args.MakeArgString(Arg));
- if (Error Err =
- executeCommands(Args.getLastArgValue(OPT_linker_path_EQ), CmdArgs))
+ if (Error Err = executeCommands(*ClangPath, CmdArgs))
return std::move(Err);
return *TempFileOrErr;
@@ -601,16 +433,14 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
switch (Triple.getArch()) {
case Triple::nvptx:
case Triple::nvptx64:
- return nvptx::link(InputFiles, Args);
case Triple::amdgcn:
- return amdgcn::link(InputFiles, Args);
case Triple::x86:
case Triple::x86_64:
case Triple::aarch64:
case Triple::aarch64_be:
case Triple::ppc64:
case Triple::ppc64le:
- return generic::link(InputFiles, Args);
+ return generic::clang(InputFiles, Args);
default:
return createStringError(inconvertibleErrorCode(),
Triple.getArchName() +
@@ -933,19 +763,6 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
return Error::success();
}
- // Is we are compiling for NVPTX we need to run the assembler first.
- if (Triple.isNVPTX() || SaveTemps) {
- for (StringRef &File : Files) {
-
- auto FileOrErr = Triple.isNVPTX()
- ? nvptx::assemble(File, Args, !SingleOutput)
- : generic::assemble(File, Args);
- if (!FileOrErr)
- return FileOrErr.takeError();
- File = *FileOrErr;
- }
- }
-
// Append the new inputs to the device linker input.
for (StringRef File : Files)
OutputFiles.push_back(File);
@@ -1226,12 +1043,9 @@ linkAndWrapDeviceFiles(SmallVectorImpl<OffloadFile> &LinkerInputFiles,
}
// Link the remaining device files using the device linker.
- llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ));
- bool RequiresLinking =
- !Args.hasArg(OPT_embed_bitcode) &&
- !(Input.empty() && InputFiles.size() == 1 && Triple.isNVPTX());
- auto OutputOrErr = RequiresLinking ? linkDevice(InputFiles, LinkerArgs)
- : InputFiles.front();
+ auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode)
+ ? linkDevice(InputFiles, LinkerArgs)
+ : InputFiles.front();
if (!OutputOrErr)
return OutputOrErr.takeError();
@@ -1457,8 +1271,6 @@ int main(int Argc, char **Argv) {
SaveTemps = Args.hasArg(OPT_save_temps);
ExecutableName = Args.getLastArgValue(OPT_o, "a.out");
CudaBinaryPath = Args.getLastArgValue(OPT_cuda_path_EQ).str();
- if (!CudaBinaryPath.empty())
- CudaBinaryPath = CudaBinaryPath + "/bin";
parallel::strategy = hardware_concurrency(1);
if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {
More information about the cfe-commits
mailing list