[Openmp-commits] [openmp] d564409 - [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

Thu Oct 8 09:10:12 PDT 2020

Author: Joseph Huber
Date: 2020-10-08T12:09:34-04:00
New Revision: d564409946a5a13cb6391fc0fec54dcbd6f6d249

URL: https://github.com/llvm/llvm-project/commit/d564409946a5a13cb6391fc0fec54dcbd6f6d249
DIFF: https://github.com/llvm/llvm-project/commit/d564409946a5a13cb6391fc0fec54dcbd6f6d249.diff

LOG: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

Summary:
This patch changes the CMake files for Clang and Libomptarget to query the
system for its supported CUDA architecture. This makes it much easier for the
user to build optimal code without needing to set the flags manually. This
relies on the now deprecated FindCUDA method in CMake, but full support for
architecture detection is only availible in CMake >3.18

Reviewers: jdoerfert ye-luo

Subscribers: cfe-commits guansong mgorny openmp-commits sstefan1 yaxunl

Tags: #clang #OpenMP

Differential Revision: https://reviews.llvm.org/D87946

Added: 
    

Modified: 
    clang/CMakeLists.txt
    openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
    openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index c308328c3ee4..900ef0a4d737 100644

--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -305,13 +305,26 @@ set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the atomicMax instruction on
 # 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
-  "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
-  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest Nvidia GPU architecture the system supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+  find_package(CUDA QUIET)
+  if (CUDA_FOUND)
+    cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  endif()
+else()
+  set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH_MATCH OR "${CMAKE_MATCH_1}" LESS 35)
   set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
     "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+else()
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH_MATCH} CACHE STRING
+    "Default architecture for OpenMP offloading to Nvidia GPUs.")
 endif()
 
 set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")

diff  --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
index 05742bd4fbf7..f6bfadcaf0b3 100644
--- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
+++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -117,6 +117,18 @@ if (CUDA_TOOLKIT_ROOT_DIR)
 endif()
 find_package(CUDA QUIET)
 
+# Try to get the highest Nvidia GPU architecture the system supports
+if (CUDA_FOUND)
+  cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH_OUTPUT ${CUDA_ARCH_FLAGS})
+  if (NOT DEFINED CUDA_ARCH_MATCH_OUTPUT OR "${CMAKE_MATCH_1}" LESS 35)
+    libomptarget_warning_say("Setting Nvidia GPU architecture support for OpenMP target runtime library to sm_35 by default")
+    set(LIBOMPTARGET_DEP_CUDA_ARCH "35")
+  else()
+    set(LIBOMPTARGET_DEP_CUDA_ARCH "${CMAKE_MATCH_1}")
+  endif()
+endif()
+
 set(LIBOMPTARGET_DEP_CUDA_FOUND ${CUDA_FOUND})
 set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
 

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index ca9e75953ff4..425c674fb11e 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,14 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
-  set(default_capabilities 35)
+  # Build library support for the highest compute capability the system supports
+  # and always build support for sm_35 by default
+  if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35)
+    set(default_capabilities 35)
+  else()
+      set(default_capabilities "35,${LIBOMPTARGET_DEP_CUDA_ARCH}")
+  endif()
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
     set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
     libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")