[Openmp-commits] [PATCH] D88185: [OpenMP] cmake option LIBOMPTARGET_NVPTX_MAX_SM for nvptx device RTL
Ye Luo via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Wed Sep 23 15:14:13 PDT 2020
ye-luo created this revision.
Herald added subscribers: guansong, yaxunl, mgorny.
ye-luo requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added a subscriber: sstefan1.
It allows customizing MAX_SM for non-flagship GPU and reduces graphic memory usage.
In addition, so far the size is hard-coded up to __CUDA_ARCH__ 700 and is already a hassle for 800.
Introduce MAX_SM for 800 and protect future arch
https://reviews.llvm.org/D88185
Files:
openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -47,16 +47,27 @@
// Maximum number of omp state objects per SM allocated statically in global
// memory.
-#if __CUDA_ARCH__ >= 700
+#if __CUDA_ARCH__ >= 600
#define OMP_STATE_COUNT 32
+#else
+#define OMP_STATE_COUNT 16
+#endif
+
+#if !defined(MAX_SM)
+#if __CUDA_ARCH__ >= 900
+#error unsupported compute capability, define MAX_SM via LIBOMPTARGET_NVPTX_MAX_SM cmake option
+#elif __CUDA_ARCH__ >= 800
+// GA100 design has a maxinum of 128 SMs but A100 product only has 108 SMs
+// GA102 design has a maxinum of 84 SMs
+#define MAX_SM 108
+#elif __CUDA_ARCH__ >= 700
#define MAX_SM 84
#elif __CUDA_ARCH__ >= 600
-#define OMP_STATE_COUNT 32
#define MAX_SM 56
#else
-#define OMP_STATE_COUNT 16
#define MAX_SM 16
#endif
+#endif
#define OMP_ACTIVE_PARALLEL_LEVEL 128
Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -82,6 +82,11 @@
set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
endforeach()
+ # Override default MAX_SM in src/target_impl.h if requested
+ if (DEFINED LIBOMPTARGET_NVPTX_MAX_SM)
+ set(MAX_SM_DEFINITION "-DMAX_SM=${LIBOMPTARGET_NVPTX_MAX_SM}")
+ endif()
+
# Activate RTL message dumps if requested by the user.
set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
"Activate NVPTX device RTL debug messages.")
@@ -96,7 +101,7 @@
list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory}
-I${devicertl_nvptx_directory}/src)
cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects}
- OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG})
+ OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG} ${MAX_SM_DEFINITION})
# Install device RTL under the lib destination folder.
install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
@@ -159,7 +164,7 @@
get_filename_component(outfile ${src} NAME)
add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
- COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch}
+ COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch} ${MAX_SM_DEFINITION}
-c ${infile} -o ${outfile}-sm_${sm}.bc
DEPENDS ${infile}
IMPLICIT_DEPENDS CXX ${infile}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88185.293869.patch
Type: text/x-patch
Size: 2657 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20200923/fba089f4/attachment.bin>
More information about the Openmp-commits
mailing list