r318763 - [OpenMP] Consistently use cubin extension for nvlink
Jonas Hahnfeld via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 21 06:44:45 PST 2017
Author: hahnfeld
Date: Tue Nov 21 06:44:45 2017
New Revision: 318763
URL: http://llvm.org/viewvc/llvm-project?rev=318763&view=rev
Log:
[OpenMP] Consistently use cubin extension for nvlink
This was previously done in some places, but for example not for
bundling so that single object compilation with -c failed. In
addition cubin was used for all file types during unbundling which
is incorrect for assembly files that are passed to ptxas.
Tighten up the tests so that we can't regress in that area.
Differential Revision: https://reviews.llvm.org/D40250
Modified:
cfe/trunk/include/clang/Driver/ToolChain.h
cfe/trunk/lib/Driver/ToolChain.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
cfe/trunk/lib/Driver/ToolChains/Cuda.h
cfe/trunk/test/Driver/openmp-offload-gpu.c
Modified: cfe/trunk/include/clang/Driver/ToolChain.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/ToolChain.h?rev=318763&r1=318762&r2=318763&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/ToolChain.h (original)
+++ cfe/trunk/include/clang/Driver/ToolChain.h Tue Nov 21 06:44:45 2017
@@ -40,6 +40,7 @@ namespace driver {
class Compilation;
class CudaInstallationDetector;
class Driver;
+ class InputInfo;
class JobAction;
class RegisterEffectiveTriple;
class SanitizerArgs;
@@ -172,6 +173,11 @@ public:
/// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu.
virtual const llvm::Triple *getAuxTriple() const { return nullptr; }
+ /// Some toolchains need to modify the file name, for example to replace the
+ /// extension for object files with .cubin for OpenMP offloading to Nvidia
+ /// GPUs.
+ virtual std::string getInputFilename(const InputInfo &Input) const;
+
llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
StringRef getArchName() const { return Triple.getArchName(); }
StringRef getPlatform() const { return Triple.getVendorName(); }
Modified: cfe/trunk/lib/Driver/ToolChain.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChain.cpp?rev=318763&r1=318762&r2=318763&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChain.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChain.cpp Tue Nov 21 06:44:45 2017
@@ -215,6 +215,10 @@ StringRef ToolChain::getDefaultUniversal
}
}
+std::string ToolChain::getInputFilename(const InputInfo &Input) const {
+ return Input.getFilename();
+}
+
bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
return false;
}
Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=318763&r1=318762&r2=318763&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Tue Nov 21 06:44:45 2017
@@ -5310,12 +5310,15 @@ void OffloadBundler::ConstructJob(Compil
if (I)
Triples += ',';
+ // Find ToolChain for this input.
Action::OffloadKind CurKind = Action::OFK_Host;
const ToolChain *CurTC = &getToolChain();
const Action *CurDep = JA.getInputs()[I];
if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
+ CurTC = nullptr;
OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
CurKind = A->getOffloadingDeviceKind();
CurTC = TC;
});
@@ -5336,7 +5339,17 @@ void OffloadBundler::ConstructJob(Compil
for (unsigned I = 0; I < Inputs.size(); ++I) {
if (I)
UB += ',';
- UB += Inputs[I].getFilename();
+
+ // Find ToolChain for this input.
+ const ToolChain *CurTC = &getToolChain();
+ if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
+ CurTC = nullptr;
+ OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
+ CurTC = TC;
+ });
+ }
+ UB += CurTC->getInputFilename(Inputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
@@ -5396,13 +5409,7 @@ void OffloadBundler::ConstructJobMultipl
for (unsigned I = 0; I < Outputs.size(); ++I) {
if (I)
UB += ',';
- SmallString<256> OutputFileName(Outputs[I].getFilename());
- // Change extension of target files for OpenMP offloading
- // to NVIDIA GPUs.
- if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() &&
- JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- UB += OutputFileName;
+ UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
CmdArgs.push_back("-unbundle");
Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.cpp?rev=318763&r1=318762&r2=318763&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp Tue Nov 21 06:44:45 2017
@@ -301,10 +301,7 @@ void NVPTX::Assembler::ConstructJob(Comp
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
- SmallString<256> OutputFileName(Output.getFilename());
- if (JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- CmdArgs.push_back(Args.MakeArgString(OutputFileName));
+ CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
for (const auto& II : Inputs)
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
@@ -431,11 +428,8 @@ void NVPTX::OpenMPLinker::ConstructJob(C
if (!II.isFilename())
continue;
- SmallString<256> Name(II.getFilename());
- llvm::sys::path::replace_extension(Name, "cubin");
-
- const char *CubinF =
- C.addTempFile(C.getArgs().MakeArgString(Name));
+ const char *CubinF = C.addTempFile(
+ C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
CmdArgs.push_back(CubinF);
}
@@ -463,6 +457,20 @@ CudaToolChain::CudaToolChain(const Drive
getProgramPaths().push_back(getDriver().Dir);
}
+std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
+ // Only object files are changed, for example assembly files keep their .s
+ // extensions. CUDA also continues to use .o as they don't use nvlink but
+ // fatbinary.
+ if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
+ return ToolChain::getInputFilename(Input);
+
+ // Replace extension for object files with cubin because nvlink relies on
+ // these particular file names.
+ SmallString<256> Filename(ToolChain::getInputFilename(Input));
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ return Filename.str();
+}
+
void CudaToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.h?rev=318763&r1=318762&r2=318763&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChains/Cuda.h (original)
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.h Tue Nov 21 06:44:45 2017
@@ -137,10 +137,12 @@ public:
const ToolChain &HostTC, const llvm::opt::ArgList &Args,
const Action::OffloadKind OK);
- virtual const llvm::Triple *getAuxTriple() const override {
+ const llvm::Triple *getAuxTriple() const override {
return &HostTC.getTriple();
}
+ std::string getInputFilename(const InputInfo &Input) const override;
+
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
Modified: cfe/trunk/test/Driver/openmp-offload-gpu.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/openmp-offload-gpu.c?rev=318763&r1=318762&r2=318763&view=diff
==============================================================================
--- cfe/trunk/test/Driver/openmp-offload-gpu.c (original)
+++ cfe/trunk/test/Driver/openmp-offload-gpu.c Tue Nov 21 06:44:45 2017
@@ -10,7 +10,8 @@
/// ###########################################################################
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
@@ -19,7 +20,9 @@
/// ###########################################################################
/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp \
+// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
+// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
@@ -28,43 +31,67 @@
/// ###########################################################################
/// Check cubin file generation and usage by nvlink
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+
+// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
+
+/// ###########################################################################
-// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-NEXT: nvlink" {{.*}}.cubin"
+/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
+// RUN: touch %t.s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
+/// Use DAG to ensure that assembly file has been unbundled.
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]]
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s
+/// Check cubin file generation and bundling
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
-// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: nvlink" {{.*}}.cubin"
+// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink
-// RUN: touch %t1.o
-// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file unbundling and usage by nvlink
+// RUN: touch %t.o
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
-// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+/// Use DAG to ensure that cubin file has been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+/// Check cubin file generation and usage by nvlink
// RUN: touch %t1.o
// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
-// CHK-TWOCUBIN-DARWIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
/// ###########################################################################
@@ -77,7 +104,8 @@
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
+// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
@@ -86,7 +114,8 @@
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
+// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
More information about the cfe-commits
mailing list