[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

Fri Oct 13 08:07:27 PDT 2017

Hahnfeld created this revision.
Herald added a subscriber: mgorny.

For the shuffle instructions in reductions we need at least sm_30
but the user may want to customize the default architecture.
Also remove some code that went in while troubleshooting broken
tests on external build bots.


https://reviews.llvm.org/D38883

Files:
  CMakeLists.txt
  include/clang/Config/config.h.cmake
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h


Index: lib/Driver/ToolChains/Cuda.h
===================================================================

--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
   std::string getLibDeviceFile(StringRef Gpu) const {
     return LibDeviceMap.lookup(Gpu);
   }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-    std::string LibDeviceFile;
-    for (auto key : LibDeviceMap.keys()) {
-      LibDeviceFile = LibDeviceMap.lookup(key);
-      if (!LibDeviceFile.empty())
-        return key;
-    }
-    return "sm_20";
-  }
 };
 
 namespace tools {
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -167,19 +167,6 @@
       }
     }
 
-    // This code prevents IsValid from being set when
-    // no libdevice has been found.
-    bool allEmpty = true;
-    std::string LibDeviceFile;
-    for (auto key : LibDeviceMap.keys()) {
-      LibDeviceFile = LibDeviceMap.lookup(key);
-      if (!LibDeviceFile.empty())
-        allEmpty = false;
-    }
-
-    if (allEmpty)
-      continue;
-
     IsValid = true;
     break;
   }
@@ -565,12 +552,8 @@
 
     StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
     if (Arch.empty()) {
-      // Default compute capability for CUDA toolchain is the
-      // lowest compute capability supported by the installed
-      // CUDA version.
-      DAL->AddJoinedArg(nullptr,
-          Opts.getOption(options::OPT_march_EQ),
-          CudaInstallation.getLowestExistingArch());
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+                        CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
     }
 
     return DAL;
Index: include/clang/Config/config.h.cmake
===================================================================
--- include/clang/Config/config.h.cmake
+++ include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
 /* Default OpenMP runtime used by -fopenmp. */
 #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
 
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
 /* Multilib suffix for libdir. */
 #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
 
Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -235,6 +235,16 @@
 set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code for reductions.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+  "Default architecture for OpenMP offloading to Nvidia GPUs.")
+if (NOT("${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}" MATCHES "^sm_[0-9]+$"))
+  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+    "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+endif()
+
 set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
   "Vendor-specific text for showing with version information.")
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38883.118912.patch
Type: text/x-patch
Size: 3340 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20171013/e2da685b/attachment.bin>