[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

Tue Oct 6 18:59:23 PDT 2020

jhuber6 updated this revision to Diff 296577.
jhuber6 added a comment.

Removing redundant call to `find_package`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

Files:
  clang/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===================================================================

--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,19 @@
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
+  # Always build with compute capability sm_35 as a fallback and the highest
+  # architecture the system supports by default
   set(default_capabilities 35)
+  if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+    cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+    string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+    if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+      libomptarget_warning_say("Setting Nvidia GPU architecture support for OpenMP target runtime library to sm_35 by default")
+    else()
+      list(APPEND default_capabilities ${CMAKE_MATCH_1})
+    endif()
+  endif()
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
     set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
     libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the atomicMax instruction on
 # 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
-  "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
-  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest architecture the host supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+  find_package(CUDA QUIET)
+  if (CUDA_FOUND)
+    cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  endif()
+else()
+  set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
   set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
     "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+else()
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH} CACHE STRING
+    "Default architecture for OpenMP offloading to Nvidia GPUs.")
 endif()
 
 set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88929.296577.patch
Type: text/x-patch
Size: 3010 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201007/724fcb19/attachment.bin>