[clang] d9c64d3 - [OpenMP] Allow CUDA to be linked with OpenMP using the new driver
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 29 08:38:48 PDT 2022
Author: Joseph Huber
Date: 2022-04-29T11:38:40-04:00
New Revision: d9c64d33b98be695fc78a65624242033058ed117
URL: https://github.com/llvm/llvm-project/commit/d9c64d33b98be695fc78a65624242033058ed117
DIFF: https://github.com/llvm/llvm-project/commit/d9c64d33b98be695fc78a65624242033058ed117.diff
LOG: [OpenMP] Allow CUDA to be linked with OpenMP using the new driver
After basic support for embedding and handling CUDA files was added to
the new driver, we should be able to call CUDA functions from OpenMP
code. This patch makes the necessary changes to successfuly link in CUDA
programs that were compiled using the new driver. With this patch it
should be possible to compile device-only CUDA code (no kernels) and
call it from OpenMP as follows:
```
$ clang++ cuda.cu -fopenmp-new-driver -offload-arch=sm_70 -c
$ clang++ openmp.cpp cuda.o -fopenmp-new-driver -fopenmp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_70
```
Currently this requires using a host variant to suppress the generation
of a CPU-side fallback call.
Depends on D120272
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D120273
Added:
Modified:
clang/test/Driver/linker-wrapper.c
clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Removed:
################################################################################
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 7920fe8c1a99..a180a792284e 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -40,3 +40,11 @@
// LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 {{.*}}.s
// LTO-NOT: nvlink
+
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 \
+// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,cuda,nvptx64-nvida-cuda,sm_70
+// RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \
+// RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA_OMP_LINK
+
+// CUDA_OMP_LINK: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 09381d93f17d..c2c1fa738be6 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -160,6 +160,10 @@ static codegen::RegisterCodeGenFlags CodeGenFlags;
/// section will contain one or more offloading binaries stored contiguously.
#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading"
+/// The magic offset for the first object inside CUDA's fatbinary. This can be
+///
diff erent but it should work for what is passed here.
+static constexpr unsigned FatbinaryOffset = 0x50;
+
/// Information for a device offloading file extracted from the host.
struct DeviceFile {
DeviceFile(StringRef Kind, StringRef TheTriple, StringRef Arch,
@@ -173,7 +177,10 @@ struct DeviceFile {
};
namespace llvm {
-/// Helper that allows DeviceFile to be used as a key in a DenseMap.
+/// Helper that allows DeviceFile to be used as a key in a DenseMap. For now we
+/// assume device files with matching architectures and triples but
diff erent
+/// offloading kinds should be handlded together, this may not be true in the
+/// future.
template <> struct DenseMapInfo<DeviceFile> {
static DeviceFile getEmptyKey() {
return {DenseMapInfo<StringRef>::getEmptyKey(),
@@ -953,13 +960,37 @@ Error linkBitcodeFiles(SmallVectorImpl<std::string> &InputFiles,
MemoryBuffer::getFileOrSTDIN(File);
if (std::error_code EC = BufferOrErr.getError())
return createFileError(File, EC);
+ MemoryBufferRef Buffer = **BufferOrErr;
file_magic Type = identify_magic((*BufferOrErr)->getBuffer());
- if (Type != file_magic::bitcode) {
+ switch (Type) {
+ case file_magic::bitcode: {
+ Expected<std::unique_ptr<lto::InputFile>> InputFileOrErr =
+ llvm::lto::InputFile::create(Buffer);
+ if (!InputFileOrErr)
+ return InputFileOrErr.takeError();
+
+ // Save the input file and the buffer associated with its memory.
+ BitcodeFiles.push_back(std::move(*InputFileOrErr));
+ SavedBuffers.push_back(std::move(*BufferOrErr));
+ continue;
+ }
+ case file_magic::cuda_fatbinary: {
+ // Cuda fatbinaries made by Clang almost almost have an object eighty
+ // bytes from the beginning. This should be sufficient to identify the
+ // symbols.
+ Buffer = MemoryBufferRef(
+ (*BufferOrErr)->getBuffer().drop_front(FatbinaryOffset), "FatBinary");
+ LLVM_FALLTHROUGH;
+ }
+ case file_magic::elf_relocatable:
+ case file_magic::elf_shared_object:
+ case file_magic::macho_object:
+ case file_magic::coff_object: {
Expected<std::unique_ptr<ObjectFile>> ObjFile =
- ObjectFile::createObjectFile(**BufferOrErr, Type);
+ ObjectFile::createObjectFile(Buffer);
if (!ObjFile)
- return ObjFile.takeError();
+ continue;
NewInputFiles.push_back(File.str());
for (auto &Sym : (*ObjFile)->symbols()) {
@@ -973,15 +1004,10 @@ Error linkBitcodeFiles(SmallVectorImpl<std::string> &InputFiles,
else
UsedInSharedLib.insert(Saver.save(*Name));
}
- } else {
- Expected<std::unique_ptr<lto::InputFile>> InputFileOrErr =
- llvm::lto::InputFile::create(**BufferOrErr);
- if (!InputFileOrErr)
- return InputFileOrErr.takeError();
-
- // Save the input file and the buffer associated with its memory.
- BitcodeFiles.push_back(std::move(*InputFileOrErr));
- SavedBuffers.push_back(std::move(*BufferOrErr));
+ continue;
+ }
+ default:
+ continue;
}
}
More information about the cfe-commits
mailing list