[clang] 52b9a39 - [OpenMP] Make `-fopenmp-target=` use the `nvptx-arch` tool

Fri Jan 13 14:52:25 PST 2023

Author: Joseph Huber
Date: 2023-01-13T16:52:06-06:00
New Revision: 52b9a3974244183806ba862e161670d79885443a

URL: https://github.com/llvm/llvm-project/commit/52b9a3974244183806ba862e161670d79885443a
DIFF: https://github.com/llvm/llvm-project/commit/52b9a3974244183806ba862e161670d79885443a.diff

LOG: [OpenMP] Make `-fopenmp-target=` use the `nvptx-arch` tool

Previously, if the user did not provide an architecture when using
`-fopenmp-targets=nvptx64` we used the value from
`CLANG_OPENMP_DEFAULT_NVPTX_ARCH` which is defined at compile time. This
isn't ideal because it means that the default is set when the LLVM
compiler it built. Instead this patch uses the `nvptx-arch` tool to
query it at runtime. This matches the existing behaviour of the AMDGPU
toolchain with its `amdgpu-arch` tool.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D141708

Added: 
    

Modified: 
    clang/lib/Driver/ToolChains/Cuda.cpp
    clang/test/Driver/openmp-system-arch.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 52dd0c2166683..f2e575cd10592 100644

--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -19,6 +19,8 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatAdapters.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
@@ -753,10 +755,22 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
       if (!llvm::is_contained(*DAL, A))
         DAL->append(A);
 
-    if (!DAL->hasArg(options::OPT_march_EQ))
-      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
-                        !BoundArch.empty() ? BoundArch
-                                           : CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
+    if (!DAL->hasArg(options::OPT_march_EQ)) {
+      StringRef Arch = BoundArch;
+      if (Arch.empty()) {
+        auto ArchsOrErr = getSystemGPUArchs(Args);
+        if (!ArchsOrErr) {
+          std::string ErrMsg =
+              llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
+          getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+              << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
+          Arch = CudaArchToString(CudaArch::CudaDefault);
+        } else {
+          Arch = Args.MakeArgString(ArchsOrErr->front());
+        }
+      }
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
+    }
 
     return DAL;
   }

diff  --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c
index b99522269902c..2aefa96003f23 100644
--- a/clang/test/Driver/openmp-system-arch.c
+++ b/clang/test/Driver/openmp-system-arch.c
@@ -37,12 +37,18 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
 // RUN:     --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=ARCH-GFX906
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:     --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=ARCH-GFX906
 // ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
 
 // case when nvptx-arch succeeds.
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
 // RUN:     --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=ARCH-SM_70
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:     --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=ARCH-SM_70
 // ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
 
 // case when both nvptx-arch and amdgpu-arch succeed.
@@ -60,3 +66,15 @@
 // ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
 // ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
 // ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75"
+
+// case when 'nvptx-arch' returns nothing using `-fopenmp-targets=`.
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp \
+// RUN:     -fopenmp-targets=nvptx64-nvidia-cuda --nvptx-arch-tool=%t/nvptx_arch_empty %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=NVPTX
+// NVPTX: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '-march'
+
+// case when 'amdgpu-arch' returns nothing using `-fopenmp-targets=`.
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp \
+// RUN:     -fopenmp-targets=amdgcn-amd-amdhsa --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=AMDGPU
+// AMDGPU: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march'