[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default
Joseph Huber via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 6 18:59:23 PDT 2020
jhuber6 updated this revision to Diff 296577.
jhuber6 added a comment.
Removing redundant call to `find_package`.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D88929/new/
https://reviews.llvm.org/D88929
Files:
clang/CMakeLists.txt
openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,19 @@
set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
- # Get the compute capability the user requested or use SM_35 by default.
- # SM_35 is what clang uses by default.
+ # Always build with compute capability sm_35 as a fallback and the highest
+ # architecture the system supports by default
set(default_capabilities 35)
+ if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+ cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+ string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+ if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+ libomptarget_warning_say("Setting Nvidia GPU architecture support for OpenMP target runtime library to sm_35 by default")
+ else()
+ list(APPEND default_capabilities ${CMAKE_MATCH_1})
+ endif()
+ endif()
+
if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
# OpenMP offloading requires at least sm_35 because we use shuffle instructions
# to generate efficient code for reductions and the atomicMax instruction on
# 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
- "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
- message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest architecture the host supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+ find_package(CUDA QUIET)
+ if (CUDA_FOUND)
+ cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+ endif()
+else()
+ set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+ message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+else()
+ set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH} CACHE STRING
+ "Default architecture for OpenMP offloading to Nvidia GPUs.")
endif()
set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88929.296577.patch
Type: text/x-patch
Size: 3010 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201007/724fcb19/attachment.bin>
More information about the cfe-commits
mailing list