[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

Wed Oct 7 14:27:35 PDT 2020

jhuber6 updated this revision to Diff 296792.
jhuber6 added a comment.

Implementing Ye's code. I changed it to putput the architecture as a dependency and then set the value where we define the default architecture. I did a full build from scratch using this and got the sm_70 libraries for my machine without needing to specify it in my build script.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

Files:
  clang/CMakeLists.txt
  openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===================================================================

--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,14 @@
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
-  set(default_capabilities 35)
+  # Build library support for the highest compute capability the system supports
+  # and always build support for sm_35 by default
+  if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35)
+    set(default_capabilities 35)
+  else()
+      set(default_capabilities "35,${LIBOMPTARGET_DEP_CUDA_ARCH}")
+  endif()
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
     set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
     libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
Index: openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
===================================================================
--- openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
+++ openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -117,6 +117,18 @@
 endif()
 find_package(CUDA QUIET)
 
+# Try to get the highest Nvidia GPU architecture the system supports
+if (CUDA_FOUND)
+  cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH_OUTPUT ${CUDA_ARCH_FLAGS})
+  if (NOT DEFINED CUDA_ARCH_MATCH_OUTPUT OR "${CMAKE_MATCH_1}" LESS 35)
+    libomptarget_warning_say("Setting Nvidia GPU architecture support for OpenMP target runtime library to sm_35 by default")
+    set(LIBOMPTARGET_DEP_CUDA_ARCH "35")
+  else()
+    set(LIBOMPTARGET_DEP_CUDA_ARCH "${CMAKE_MATCH_1}")
+  endif()
+endif()
+
 set(LIBOMPTARGET_DEP_CUDA_FOUND ${CUDA_FOUND})
 set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
 
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the atomicMax instruction on
 # 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
-  "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
-  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest Nvidia GPU architecture the system supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+  find_package(CUDA QUIET)
+  if (CUDA_FOUND)
+    cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  endif()
+else()
+  set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH_MATCH OR "${CMAKE_MATCH_1}" LESS 35)
   set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
     "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+else()
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH_MATCH} CACHE STRING
+    "Default architecture for OpenMP offloading to Nvidia GPUs.")
 endif()
 
 set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88929.296792.patch
Type: text/x-patch
Size: 3755 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201007/6726ea4b/attachment-0001.bin>