[PATCH] D124292: [OpenMP] Use CUDA's non-RDC mode when LTO has whole program visibility
Joseph Huber via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Sat Apr 23 09:43:03 PDT 2022
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3530c35c6609: [OpenMP] Use CUDA's non-RDC mode when LTO has whole program visibility (authored by jhuber6).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D124292/new/
https://reviews.llvm.org/D124292
Files:
clang/test/Driver/linker-wrapper.c
clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -595,7 +595,7 @@
// TODO: Move these to a separate file.
namespace nvptx {
Expected<std::string> assemble(StringRef InputFile, Triple TheTriple,
- StringRef Arch) {
+ StringRef Arch, bool RDC = true) {
// NVPTX uses the ptxas binary to create device object files.
Expected<std::string> PtxasPath = findProgram("ptxas", {CudaBinaryPath});
if (!PtxasPath)
@@ -626,7 +626,8 @@
CmdArgs.push_back(Opt);
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Arch);
- CmdArgs.push_back("-c");
+ if (RDC)
+ CmdArgs.push_back("-c");
CmdArgs.push_back(InputFile);
@@ -933,7 +934,8 @@
}
Error linkBitcodeFiles(SmallVectorImpl<std::string> &InputFiles,
- const Triple &TheTriple, StringRef Arch) {
+ const Triple &TheTriple, StringRef Arch,
+ bool &WholeProgram) {
SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;
SmallVector<std::unique_ptr<lto::InputFile>, 4> BitcodeFiles;
SmallVector<std::string, 4> NewInputFiles;
@@ -1009,7 +1011,7 @@
};
// We assume visibility of the whole program if every input file was bitcode.
- bool WholeProgram = BitcodeFiles.size() == InputFiles.size();
+ WholeProgram = BitcodeFiles.size() == InputFiles.size();
auto LTOBackend =
(EmbedBitcode) ? createLTO(TheTriple, Arch, WholeProgram, OutputBitcode)
: createLTO(TheTriple, Arch, WholeProgram);
@@ -1089,7 +1091,7 @@
// Is we are compiling for NVPTX we need to run the assembler first.
if (TheTriple.isNVPTX() && !EmbedBitcode) {
for (auto &File : Files) {
- auto FileOrErr = nvptx::assemble(File, TheTriple, Arch);
+ auto FileOrErr = nvptx::assemble(File, TheTriple, Arch, !WholeProgram);
if (!FileOrErr)
return FileOrErr.takeError();
File = *FileOrErr;
@@ -1117,10 +1119,11 @@
for (auto &LinkerInput : LinkerInputMap) {
DeviceFile &File = LinkerInput.getFirst();
Triple TheTriple = Triple(File.TheTriple);
+ bool WholeProgram = false;
// Run LTO on any bitcode files and replace the input with the result.
- if (Error Err =
- linkBitcodeFiles(LinkerInput.getSecond(), TheTriple, File.Arch))
+ if (Error Err = linkBitcodeFiles(LinkerInput.getSecond(), TheTriple,
+ File.Arch, WholeProgram))
return Err;
// If we are embedding bitcode for JIT, skip the final device linking.
@@ -1130,6 +1133,14 @@
continue;
}
+ // If we performed LTO on NVPTX and had whole program visibility, we can use
+ // CUDA in non-RDC mode.
+ if (WholeProgram && TheTriple.isNVPTX()) {
+ assert(!LinkerInput.getSecond().empty() && "No non-RDC image to embed");
+ LinkedImages.push_back(LinkerInput.getSecond().front());
+ continue;
+ }
+
auto ImageOrErr = linkDevice(LinkerInput.getSecond(), TheTriple, File.Arch);
if (!ImageOrErr)
return ImageOrErr.takeError();
Index: clang/test/Driver/linker-wrapper.c
===================================================================
--- clang/test/Driver/linker-wrapper.c
+++ clang/test/Driver/linker-wrapper.c
@@ -38,5 +38,5 @@
// RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \
// RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LTO
-// LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 -c {{.*}}.s
-// LTO: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.cubin
+// LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 {{.*}}.s
+// LTO-NOT: nvlink
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124292.424730.patch
Type: text/x-patch
Size: 3890 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220423/0f476cd3/attachment.bin>
More information about the cfe-commits
mailing list