[clang] a17ab7a - [OpenMP] Add support for '--offload-arch=native' to OpenMP offloading
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 11 08:30:50 PST 2023
Author: Joseph Huber
Date: 2023-01-11T10:30:38-06:00
New Revision: a17ab7aa3be0b2b2adf992e3754a58f17802491f
URL: https://github.com/llvm/llvm-project/commit/a17ab7aa3be0b2b2adf992e3754a58f17802491f
DIFF: https://github.com/llvm/llvm-project/commit/a17ab7aa3be0b2b2adf992e3754a58f17802491f.diff
LOG: [OpenMP] Add support for '--offload-arch=native' to OpenMP offloading
This patch adds support for '--offload-arch=native' to OpenMP
offloading. This will automatically generate the toolchains required to
fulfil whatever GPUs the user has installed. Getting this to work
requires a bit of a hack. The problem is that we need the ToolChain to
launch its searching program. But we do not yet have that ToolChain
built. I had to temporarily make the ToolChain and also add some logic
to ignore regular warnings & errors.
Depends on D141078
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D141105
Added:
clang/test/Driver/openmp-system-arch.c
Modified:
clang/include/clang/Driver/Driver.h
clang/lib/Driver/Driver.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index 73cd0362c3b8..4bbb113b6cf5 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -481,10 +481,11 @@ class Driver {
/// Returns the set of bound architectures active for this offload kind.
/// If there are no bound architctures we return a set containing only the
- /// empty string.
+ /// empty string. The \p SuppressError option is used to suppress errors.
llvm::DenseSet<StringRef>
getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
- Action::OffloadKind Kind, const ToolChain *TC) const;
+ Action::OffloadKind Kind, const ToolChain *TC,
+ bool SuppressError = false) const;
/// Check that the file referenced by Value exists. If it doesn't,
/// issue a diagnostic and return false.
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 3e7616b166d8..1c71e71597df 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -859,9 +859,30 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
HostTC->getTriple());
// Attempt to deduce the offloading triple from the set of architectures.
- // We can only correctly deduce NVPTX / AMDGPU triples currently.
- llvm::DenseSet<StringRef> Archs =
- getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr);
+ // We can only correctly deduce NVPTX / AMDGPU triples currently. We need
+ // to temporarily create these toolchains so that we can access tools for
+ // inferring architectures.
+ llvm::DenseSet<StringRef> Archs;
+ if (NVPTXTriple) {
+ auto TempTC = std::make_unique<toolchains::CudaToolChain>(
+ *this, *NVPTXTriple, *HostTC, C.getInputArgs());
+ for (StringRef Arch : getOffloadArchs(
+ C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true))
+ Archs.insert(Arch);
+ }
+ if (AMDTriple) {
+ auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
+ *this, *AMDTriple, *HostTC, C.getInputArgs());
+ for (StringRef Arch : getOffloadArchs(
+ C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true))
+ Archs.insert(Arch);
+ }
+ if (!AMDTriple && !NVPTXTriple) {
+ for (StringRef Arch :
+ getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr, true))
+ Archs.insert(Arch);
+ }
+
for (StringRef Arch : Archs) {
if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch(
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
@@ -876,6 +897,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
}
}
+ // If the set is empty then we failed to find a native architecture.
+ if (Archs.empty()) {
+ Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
+ << "native";
+ return;
+ }
+
for (const auto &TripleAndArchs : DerivedArchs)
OpenMPTriples.push_back(TripleAndArchs.first());
}
@@ -4193,16 +4221,17 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
static StringRef getCanonicalArchString(Compilation &C,
const llvm::opt::DerivedArgList &Args,
StringRef ArchStr,
- const llvm::Triple &Triple) {
+ const llvm::Triple &Triple,
+ bool SuppressError = false) {
// Lookup the CUDA / HIP architecture string. Only report an error if we were
// expecting the triple to be only NVPTX / AMDGPU.
CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr));
- if (Triple.isNVPTX() &&
+ if (!SuppressError && Triple.isNVPTX() &&
(Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) {
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
<< "CUDA" << ArchStr;
return StringRef();
- } else if (Triple.isAMDGPU() &&
+ } else if (!SuppressError && Triple.isAMDGPU() &&
(Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) {
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
<< "HIP" << ArchStr;
@@ -4245,7 +4274,8 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
llvm::DenseSet<StringRef>
Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
- Action::OffloadKind Kind, const ToolChain *TC) const {
+ Action::OffloadKind Kind, const ToolChain *TC,
+ bool SuppressError) const {
if (!TC)
TC = &C.getDefaultToolChain();
@@ -4279,21 +4309,26 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
// invalid architecture is given we simply exit.
if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
for (StringRef Arch : llvm::split(Arg->getValue(), ",")) {
- if (Arch == "native") {
+ if (Arch == "native" || Arch.empty()) {
auto GPUsOrErr = TC->getSystemGPUArchs(Args);
if (!GPUsOrErr) {
- TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
- << llvm::Triple::getArchTypeName(TC->getArch())
- << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch";
+ if (SuppressError)
+ llvm::consumeError(GPUsOrErr.takeError());
+ else
+ TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+ << llvm::Triple::getArchTypeName(TC->getArch())
+ << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch";
continue;
}
- for (auto ArchStr : *GPUsOrErr)
+ for (auto ArchStr : *GPUsOrErr) {
Archs.insert(
- getCanonicalArchString(C, Args, ArchStr, TC->getTriple()));
+ getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr),
+ TC->getTriple(), SuppressError));
+ }
} else {
- StringRef ArchStr =
- getCanonicalArchString(C, Args, Arch, TC->getTriple());
+ StringRef ArchStr = getCanonicalArchString(
+ C, Args, Arch, TC->getTriple(), SuppressError);
if (ArchStr.empty())
return Archs;
Archs.insert(ArchStr);
@@ -4304,8 +4339,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
if (Arch == "all") {
Archs.clear();
} else {
- StringRef ArchStr =
- getCanonicalArchString(C, Args, Arch, TC->getTriple());
+ StringRef ArchStr = getCanonicalArchString(
+ C, Args, Arch, TC->getTriple(), SuppressError);
if (ArchStr.empty())
return Archs;
Archs.erase(ArchStr);
@@ -4320,6 +4355,10 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
C.setContainsError();
}
+ // Skip filling defaults if we're just querying what is availible.
+ if (SuppressError)
+ return Archs;
+
if (Archs.empty()) {
if (Kind == Action::OFK_Cuda)
Archs.insert(CudaArchToString(CudaArch::CudaDefault));
diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c
new file mode 100644
index 000000000000..13ce8146f718
--- /dev/null
+++ b/clang/test/Driver/openmp-system-arch.c
@@ -0,0 +1,56 @@
+// RUN: mkdir -p %t
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
+// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
+// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty
+// RUN: chmod +x %t/amdgpu_arch_fail
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_empty
+// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
+// RUN: chmod +x %t/nvptx_arch_fail
+// RUN: chmod +x %t/nvptx_arch_sm_70
+// RUN: chmod +x %t/nvptx_arch_empty
+
+// case when nvptx-arch and amdgpu-arch return nothing or fails
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch= \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch= \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
+// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.
+
+// case when amdgpu-arch succeeds.
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
+// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
+
+// case when nvptx-arch succeeds.
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=ARCH-SM_70
+// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
+
+// case when both nvptx-arch and amdgpu-arch succeed.
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=ARCH-SM_70-GFX906
+// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
+// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
+
+// case when both nvptx-arch and amdgpu-arch succeed with other archs.
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native,sm_75,gfx1030 \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=ARCH-MULTIPLE
+// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"
+// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
+// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
+// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75"
More information about the cfe-commits
mailing list