[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default
Joseph Huber via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 6 14:36:31 PDT 2020
jhuber6 created this revision.
jhuber6 added a reviewer: jdoerfert.
jhuber6 added projects: clang, OpenMP.
Herald added subscribers: openmp-commits, cfe-commits, guansong, yaxunl, mgorny.
jhuber6 requested review of this revision.
Herald added a subscriber: sstefan1.
This patch changes the CMake files for Clang and Libomptarget to query the system for its supported CUDA architecture. This will simplify the experience of building LLVM with OpenMP Offloading support by removing the need to manually specify the most optimal architecture for each system. Libomptarget will also build support for sm_35 as a fallback. This uses the find_cuda methods from CMake to detect the architecture which is deprecated in Cmake 3.18.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D88929
Files:
clang/CMakeLists.txt
openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -68,13 +68,26 @@
set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
# Get the compute capability the user requested or use SM_35 by default.
- # SM_35 is what clang uses by default.
- set(default_capabilities 35)
+ set(compute_capabilities 35)
+ if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+ find_package(CUDA QUIET)
+ if (CUDA_FOUND)
+ cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+ string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+ if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+ message(WARNING "Setting default architecture for OpenMP target library to sm_35")
+ else()
+ list(APPEND compute_capabilities ${CMAKE_MATCH_1})
+ endif()
+ endif()
+ endif()
+
+
if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
endif()
- set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
+ set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${compute_capabilities} CACHE STRING
"List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
# OpenMP offloading requires at least sm_35 because we use shuffle instructions
# to generate efficient code for reductions and the atomicMax instruction on
# 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
- "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
- message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest architecture the host supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+ find_package(CUDA QUIET)
+ if (CUDA_FOUND)
+ cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+ endif()
+else()
+ set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+ message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+else()
+ set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH} CACHE STRING
+ "Default architecture for OpenMP offloading to Nvidia GPUs.")
endif()
set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88929.296548.patch
Type: text/x-patch
Size: 3308 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201006/1f2a0a30/attachment.bin>
More information about the cfe-commits
mailing list