[Openmp-commits] [openmp] d23b9fa - [Libomptarget] Update handling of architectures for DeviceRTL
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Wed Mar 8 09:22:52 PST 2023
Author: Joseph Huber
Date: 2023-03-08T11:22:33-06:00
New Revision: d23b9fa61d190a7a95a83303987b186450d5c58a
URL: https://github.com/llvm/llvm-project/commit/d23b9fa61d190a7a95a83303987b186450d5c58a
DIFF: https://github.com/llvm/llvm-project/commit/d23b9fa61d190a7a95a83303987b186450d5c58a.diff
LOG: [Libomptarget] Update handling of architectures for DeviceRTL
The support for enabling and disabling certain architectures for the
OpenMP device RTL is different between AMD and Nvidia. This patch
updates the logic to make it common. This supports the `auto` format
more generally via the `nvptx-arch` and `amdgpu-arch` options. (These
are not availible at CMake time without a runtimes build, or another
install somewhere. But that only prevents users from using auto).
Reviewed By: ye-luo
Differential Revision: https://reviews.llvm.org/D145513
Added:
Modified:
openmp/libomptarget/DeviceRTL/CMakeLists.txt
openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 6844e885fa6d0..cf9d21aa357f4 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -56,33 +56,29 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)
-set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80 86 89 90)
-
-set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${all_capabilities} CACHE STRING
- "List of CUDA Compute Capabilities to be used to compile the NVPTX DeviceRTL.")
-string(TOLOWER ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES} LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
-
-if (LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES STREQUAL "all")
- set(nvptx_sm_list ${all_capabilities})
-elseif(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES STREQUAL "auto")
- if (NOT LIBOMPTARGET_DEP_CUDA_FOUND)
- libomptarget_error_say("[NVPTX] Cannot auto detect compute capability as CUDA not found.")
- endif()
- set(nvptx_sm_list ${LIBOMPTARGET_DEP_CUDA_ARCH})
-else()
- string(REPLACE "," ";" nvptx_sm_list "${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES}")
-endif()
-
-# Check all SM values
-foreach(sm ${nvptx_sm_list})
- if (NOT ${sm} IN_LIST all_capabilities)
- libomptarget_warning_say("[NVPTX] Compute capability ${sm} is not supported. Make sure clang can work with it.")
+set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
+ "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
+ "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
+ "gfx1100;gfx1101;gfx1102;gfx1103")
+set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
+ "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90")
+set(all_gpu_architectures
+ "${all_amdgpu_architectures};${all_nvptx_architectures}")
+
+set(LIBOMPTARGET_DEVICE_ARCHITECTURES "all" CACHE STRING
+ "List of device architectures to be used to compile the OpenMP DeviceRTL.")
+
+if(LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "all")
+ set(LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_gpu_architectures})
+elseif(LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "auto")
+ if(NOT LIBOMPTARGET_NVPTX_ARCH AND NOT LIBOMPTARGET_AMDGPU_ARCH)
+ libomptarget_error_say(
+ "Could not find 'amdgpu-arch' and 'nvptx-arch' tools required for 'auto'")
+ elseif(NOT LIBOMPTARGET_FOUND_NVIDIA_GPU AND NOT LIBOMPTARGET_FOUND_AMDGPU_GPU)
+ libomptarget_error_say("No AMD or Nvidia found on the system when using 'auto'")
endif()
-endforeach()
-
-set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx1010 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103)
-if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
- set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
+ set(LIBOMPTARGET_DEVICE_ARCHITECTURES
+ "${LIBOMPTARGET_NVPTX_DETECTED_ARCH_LIST};${LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST}")
endif()
set(include_files
@@ -272,15 +268,17 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
set_property(TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name})
endfunction()
-# Generate a Bitcode library for all the compute capabilities the user requested
+# Generate a Bitcode library for all the gpu architectures the user requested.
add_custom_target(omptarget.devicertl.nvptx)
-foreach(sm ${nvptx_sm_list})
- compileDeviceRTLLibrary(sm_${sm} nvptx nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda --cuda-feature=+ptx61)
-endforeach()
-
add_custom_target(omptarget.devicertl.amdgpu)
-foreach(mcpu ${amdgpu_mcpus})
- compileDeviceRTLLibrary(${mcpu} amdgpu amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa)
+foreach(gpu_arch ${LIBOMPTARGET_DEVICE_ARCHITECTURES})
+ if("${gpu_arch}" IN_LIST all_amdgpu_architectures)
+ compileDeviceRTLLibrary(${gpu_arch} amdgpu amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa)
+ elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
+ compileDeviceRTLLibrary(${gpu_arch} nvptx nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda --cuda-feature=+ptx61)
+ else()
+ libomptarget_error_say("Unknown GPU architecture '${gpu_arch}'")
+ endif()
endforeach()
# Archive all the object files generated above into a static library
diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
index fb4935a61c565..1db8423ec06b4 100644
--- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
+++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -115,12 +115,11 @@ if(LIBOMPTARGET_NVPTX_ARCH)
execute_process(COMMAND ${LIBOMPTARGET_NVPTX_ARCH}
OUTPUT_VARIABLE LIBOMPTARGET_NVPTX_ARCH_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE)
- string(FIND "${LIBOMPTARGET_NVPTX_ARCH_OUTPUT}" "\n" first_arch_string)
- string(SUBSTRING "${LIBOMPTARGET_NVPTX_ARCH_OUTPUT}" 0 ${first_arch_string}
- arch_string)
- if(arch_string)
+ string(REPLACE "\n" ";" nvptx_arch_list "${LIBOMPTARGET_NVPTX_ARCH_OUTPUT}")
+ if(nvptx_arch_list)
set(LIBOMPTARGET_FOUND_NVIDIA_GPU TRUE)
- set(LIBOMPTARGET_DEP_CUDA_ARCH "${arch_string}")
+ set(LIBOMPTARGET_NVPTX_DETECTED_ARCH_LIST "${nvptx_arch_list}")
+ list(GET nvptx_arch_list 0 LIBOMPTARGET_DEP_CUDA_ARCH)
endif()
endif()
@@ -134,12 +133,10 @@ if(LIBOMPTARGET_AMDGPU_ARCH)
execute_process(COMMAND ${LIBOMPTARGET_AMDGPU_ARCH}
OUTPUT_VARIABLE LIBOMPTARGET_AMDGPU_ARCH_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE)
- string(FIND "${LIBOMPTARGET_AMDGPU_ARCH_OUTPUT}" "\n" first_arch_string)
- string(SUBSTRING "${LIBOMPTARGET_AMDGPU_ARCH_OUTPUT}" 0 ${first_arch_string}
- arch_string)
- if(arch_string)
+ string(REPLACE "\n" ";" amdgpu_arch_list "${LIBOMPTARGET_AMDGPU_ARCH_OUTPUT}")
+ if(amdgpu_arch_list)
set(LIBOMPTARGET_FOUND_AMDGPU_GPU TRUE)
- set(LIBOMPTARGET_DEP_AMDGPU_ARCH "${arch_string}")
+ set(LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST "${amdgpu_arch_list}")
endif()
endif()
More information about the Openmp-commits
mailing list