[compiler-rt] dad7442 - [compiler-rt] Initial support for builtins on GPU targets (#95304)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 13:07:14 PDT 2024
Author: Joseph Huber
Date: 2024-07-10T15:07:11-05:00
New Revision: dad7442aff5c85ff9141b0d0f231bcd731cbadc6
URL: https://github.com/llvm/llvm-project/commit/dad7442aff5c85ff9141b0d0f231bcd731cbadc6
DIFF: https://github.com/llvm/llvm-project/commit/dad7442aff5c85ff9141b0d0f231bcd731cbadc6.diff
LOG: [compiler-rt] Initial support for builtins on GPU targets (#95304)
Summary:
This patch adds initial support to build the `builtins` library for GPU
targets. Primarily this requires adding a few new architectures for
`amdgcn` and `nvptx64`. I built this using the following invocations.
```console
$ cmake ../compiler-rt -DCMAKE_C_COMPILER=clang
-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -GNinja
-DCMAKE_C_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
-DCMAKE_CXX_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
-DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1
-DLLVM_CMAKE_DIR=../cmake/Modules -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
-C ../compiler-rt/cmake/caches/GPU.cmake
```
Some pointers would be appreciated for how to test this using a standard
(non-default target only) build.
GPU builds are somewhat finnicky. We only expect this to be built with a
sufficiently new clang, as it's the only compiler that supports the
target and output we distribute. Distribution is done as LLVM-IR blobs
for now.
GPUs have little backward compatibility, so linking object files is
left to a future patch.
More work is necessary to build correctly for all targets and ship into
the correct clang resource directory. Additionally we need to use the
`libc` project's support for running unit tests.
Added:
compiler-rt/cmake/caches/GPU.cmake
Modified:
compiler-rt/cmake/Modules/AddCompilerRT.cmake
compiler-rt/cmake/Modules/BuiltinTests.cmake
compiler-rt/cmake/Modules/CompilerRTUtils.cmake
compiler-rt/cmake/base-config-ix.cmake
compiler-rt/cmake/builtin-config-ix.cmake
compiler-rt/lib/builtins/CMakeLists.txt
Removed:
################################################################################
diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 298093462f80b..6962b733733a6 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -175,8 +175,9 @@ function(add_compiler_rt_runtime name type)
${ARGN})
set(libnames)
# Until we support this some other way, build compiler-rt runtime without LTO
- # to allow non-LTO projects to link with it.
- if(COMPILER_RT_HAS_FNO_LTO_FLAG)
+ # to allow non-LTO projects to link with it. GPU targets can currently only be
+ # distributed as LLVM-IR and ignore this.
+ if(COMPILER_RT_HAS_FNO_LTO_FLAG AND NOT COMPILER_RT_GPU_BUILD)
set(NO_LTO_FLAGS "-fno-lto")
else()
set(NO_LTO_FLAGS "")
diff --git a/compiler-rt/cmake/Modules/BuiltinTests.cmake b/compiler-rt/cmake/Modules/BuiltinTests.cmake
index 7d71ca3f71efd..63c5f47cb5010 100644
--- a/compiler-rt/cmake/Modules/BuiltinTests.cmake
+++ b/compiler-rt/cmake/Modules/BuiltinTests.cmake
@@ -112,7 +112,7 @@ endfunction()
function(builtin_check_c_compiler_flag flag output)
if(NOT DEFINED ${output})
message(STATUS "Performing Test ${output}")
- try_compile_only(result FLAGS ${flag})
+ try_compile_only(result FLAGS ${flag} ${CMAKE_REQUIRED_FLAGS})
set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
if(${result})
message(STATUS "Performing Test ${output} - Success")
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index cec7af929fb2b..58b02fe48cd8f 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -146,6 +146,7 @@ macro(test_target_arch arch def)
endmacro()
macro(detect_target_arch)
+ check_symbol_exists(__AMDGPU__ "" __AMDGPU)
check_symbol_exists(__arm__ "" __ARM)
check_symbol_exists(__AVR__ "" __AVR)
check_symbol_exists(__aarch64__ "" __AARCH64)
@@ -154,6 +155,7 @@ macro(detect_target_arch)
check_symbol_exists(__loongarch__ "" __LOONGARCH)
check_symbol_exists(__mips__ "" __MIPS)
check_symbol_exists(__mips64__ "" __MIPS64)
+ check_symbol_exists(__NVPTX__ "" __NVPTX)
check_symbol_exists(__powerpc__ "" __PPC)
check_symbol_exists(__powerpc64__ "" __PPC64)
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
@@ -164,7 +166,9 @@ macro(detect_target_arch)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
check_symbol_exists(__ve__ "" __VE)
- if(__ARM)
+ if(__AMDGPU)
+ add_default_target_arch(amdgcn)
+ elseif(__ARM)
add_default_target_arch(arm)
elseif(__AVR)
add_default_target_arch(avr)
@@ -192,6 +196,8 @@ macro(detect_target_arch)
add_default_target_arch(mips64)
elseif(__MIPS)
add_default_target_arch(mips)
+ elseif(__NVPTX)
+ add_default_target_arch(nvptx64)
elseif(__PPC64) # must be checked before __PPC
add_default_target_arch(powerpc64)
elseif(__PPC64LE)
@@ -397,6 +403,21 @@ macro(construct_compiler_rt_default_triple)
set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
endif()
+ # If we are directly targeting a GPU we need to check that the compiler is
+ # compatible and pass some default arguments.
+ if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+
+ # Pass the necessary flags to make flag detection work.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ set(COMPILER_RT_GPU_BUILD ON)
+ set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
+ set(COMPILER_RT_GPU_BUILD ON)
+ set(CMAKE_REQUIRED_FLAGS
+ "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
+ endif()
+ endif()
+
# Determine if test target triple is specified explicitly, and doesn't match the
# default.
if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
@@ -475,6 +496,10 @@ function(get_compiler_rt_target arch variable)
endif()
endif()
set(target "${arch}${triple_suffix}")
+ elseif("${arch}" MATCHES "^amdgcn")
+ set(target "amdgcn-amd-amdhsa")
+ elseif("${arch}" MATCHES "^nvptx")
+ set(target "nvptx64-nvidia-cuda")
else()
set(target "${arch}${triple_suffix}")
endif()
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 87137b3d111b6..2d6342b28ea31 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -214,6 +214,10 @@ macro(test_targets)
test_target_arch(x86_64 "" "")
endif()
endif()
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
+ "-flto" "-fconvergent-functions"
+ "-Xclang -mcode-object-version=none")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
test_target_arch(loongarch64 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
@@ -254,6 +258,9 @@ macro(test_targets)
test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
+ test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
+ "-fconvergent-functions" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 33c97b1ac28af..1f63e158409ca 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -19,6 +19,10 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
+builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
+builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
+builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
+builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
@@ -52,6 +56,7 @@ else()
set(OS_NAME "${CMAKE_SYSTEM_NAME}")
endif()
+set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
set(AVR avr)
@@ -61,6 +66,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
+set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
@@ -78,8 +84,8 @@ if(APPLE)
endif()
set(ALL_BUILTIN_SUPPORTED_ARCH
- ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
- ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
+ ${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
+ ${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
@@ -245,7 +251,8 @@ else()
${ALL_BUILTIN_SUPPORTED_ARCH})
endif()
-if (OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER)
+if(OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER AND NOT
+ COMPILER_RT_GPU_BUILD)
set(COMPILER_RT_HAS_CRT TRUE)
else()
set(COMPILER_RT_HAS_CRT FALSE)
diff --git a/compiler-rt/cmake/caches/GPU.cmake b/compiler-rt/cmake/caches/GPU.cmake
new file mode 100644
index 0000000000000..e448774cf1457
--- /dev/null
+++ b/compiler-rt/cmake/caches/GPU.cmake
@@ -0,0 +1,18 @@
+# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
+# amdgcn and nvptx builds targeting the builtins library.
+
+set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
+
+set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
+set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index ba81c78f16608..f780b917cbdb1 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -228,7 +228,7 @@ option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
"Skip the atomic builtin (these should normally be provided by a shared library)"
On)
-if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD AND NOT COMPILER_RT_GPU_BUILD)
set(GENERIC_SOURCES
${GENERIC_SOURCES}
emutls.c
@@ -627,6 +627,8 @@ if (MINGW)
)
endif()
+set(amdgcn_SOURCES ${GENERIC_SOURCES})
+
set(armv4t_SOURCES ${arm_min_SOURCES})
set(armv5te_SOURCES ${arm_min_SOURCES})
set(armv6_SOURCES ${arm_min_SOURCES})
@@ -706,6 +708,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
${mips_SOURCES})
+set(nvptx64_SOURCES ${GENERIC_SOURCES})
+
set(powerpc_SOURCES ${GENERIC_SOURCES})
set(powerpcspe_SOURCES ${GENERIC_SOURCES})
@@ -811,6 +815,21 @@ else ()
endif()
endif()
+ # Directly targeting the GPU requires a few extra flags.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
+ append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
+ -fconvergent-functions BUILTIN_CFLAGS)
+
+ # AMDGPU targets want to use a generic ABI.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
+ "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
+ endif()
+ endif()
+
set(BUILTIN_DEFS "")
if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)
More information about the llvm-commits
mailing list