[compiler-rt] [compiler-rt] Initial support for builtins on GPU targets (PR #95304)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 08:00:59 PDT 2024
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/95304
>From e8ed01752e6f66cb6a1c4296c35a82544403d149 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 12 Jun 2024 15:15:23 -0500
Subject: [PATCH] [compiler-rt] Initial support for builtins on GPU targets
Summary:
This patch adds initial support to build the `builtins` library for GPU
targets. Primarily this requires adding a few new architectures for
`amdgcn` and `nvptx64`. I built this using the following invocations.
```console
$ cmake ../compiler-rt -DCMAKE_C_COMPILER=clang
-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -GNinja
-DCMAKE_C_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
-DCMAKE_CXX_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
-DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1
-DLLVM_CMAKE_DIR=../cmake/Modules -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
-C ../compiler-rt/cmake/caches/GPU.cmake
```
Some pointers would be appreciated for how to test this using a standard
(non-default target only) build.
GPU builds are somewhat finnicky. We only expect this to be built with a
sufficiently new clang, as it's the only compiler that supports the
target and output we distribute. Distribution is done as LLVM-IR blobs.
GPUs have little backward compatibility, so linking object files is
difficult. However, this prevents us from calling these functions
post-LTO as they will have been optimized out. Another issue is the
CMake flag querying functions, currently these fail on nvptx if you
don't have CUDA installed because they want to use the `ptxas` and
`nvlink` binaries.
More work is necessary to build correctly for all targets and ship into
the correct clang resource directory. Additionally we need to use the
`libc` project's support for running unit tests.
---
compiler-rt/cmake/Modules/AddCompilerRT.cmake | 7 +++--
compiler-rt/cmake/Modules/BuiltinTests.cmake | 2 +-
.../cmake/Modules/CompilerRTUtils.cmake | 27 ++++++++++++++++++-
compiler-rt/cmake/base-config-ix.cmake | 7 +++++
compiler-rt/cmake/builtin-config-ix.cmake | 13 ++++++---
compiler-rt/cmake/caches/GPU.cmake | 18 +++++++++++++
compiler-rt/lib/builtins/CMakeLists.txt | 21 ++++++++++++++-
7 files changed, 87 insertions(+), 8 deletions(-)
create mode 100644 compiler-rt/cmake/caches/GPU.cmake
diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 298093462f80b..83d6c8d868e9b 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -175,8 +175,11 @@ function(add_compiler_rt_runtime name type)
${ARGN})
set(libnames)
# Until we support this some other way, build compiler-rt runtime without LTO
- # to allow non-LTO projects to link with it.
- if(COMPILER_RT_HAS_FNO_LTO_FLAG)
+ # to allow non-LTO projects to link with it. GPU targets can currently only be
+ # distributed as LLVM-IR and ignore this.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
+ set(NO_LTO_FLAGS "")
+ elseif(COMPILER_RT_HAS_FNO_LTO_FLAG)
set(NO_LTO_FLAGS "-fno-lto")
else()
set(NO_LTO_FLAGS "")
diff --git a/compiler-rt/cmake/Modules/BuiltinTests.cmake b/compiler-rt/cmake/Modules/BuiltinTests.cmake
index 7d71ca3f71efd..63c5f47cb5010 100644
--- a/compiler-rt/cmake/Modules/BuiltinTests.cmake
+++ b/compiler-rt/cmake/Modules/BuiltinTests.cmake
@@ -112,7 +112,7 @@ endfunction()
function(builtin_check_c_compiler_flag flag output)
if(NOT DEFINED ${output})
message(STATUS "Performing Test ${output}")
- try_compile_only(result FLAGS ${flag})
+ try_compile_only(result FLAGS ${flag} ${CMAKE_REQUIRED_FLAGS})
set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
if(${result})
message(STATUS "Performing Test ${output} - Success")
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index 9c7fe64d0bd35..773f6c894eb2f 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -146,6 +146,7 @@ macro(test_target_arch arch def)
endmacro()
macro(detect_target_arch)
+ check_symbol_exists(__AMDGPU__ "" __AMDGPU)
check_symbol_exists(__arm__ "" __ARM)
check_symbol_exists(__AVR__ "" __AVR)
check_symbol_exists(__aarch64__ "" __AARCH64)
@@ -154,6 +155,7 @@ macro(detect_target_arch)
check_symbol_exists(__loongarch__ "" __LOONGARCH)
check_symbol_exists(__mips__ "" __MIPS)
check_symbol_exists(__mips64__ "" __MIPS64)
+ check_symbol_exists(__NVPTX__ "" __NVPTX)
check_symbol_exists(__powerpc__ "" __PPC)
check_symbol_exists(__powerpc64__ "" __PPC64)
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
@@ -164,7 +166,9 @@ macro(detect_target_arch)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
check_symbol_exists(__ve__ "" __VE)
- if(__ARM)
+ if(__AMDGPU)
+ add_default_target_arch(amdgcn)
+ elseif(__ARM)
add_default_target_arch(arm)
elseif(__AVR)
add_default_target_arch(avr)
@@ -192,6 +196,8 @@ macro(detect_target_arch)
add_default_target_arch(mips64)
elseif(__MIPS)
add_default_target_arch(mips)
+ elseif(__NVPTX)
+ add_default_target_arch(nvptx64)
elseif(__PPC64) # must be checked before __PPC
add_default_target_arch(powerpc64)
elseif(__PPC64LE)
@@ -388,6 +394,21 @@ macro(construct_compiler_rt_default_triple)
set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
endif()
+ # If we are directly targeting a GPU we need to check that the compiler is
+ # compatible and pass some default arguments.
+ if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+
+ # Pass the necessary flags to make flag detection work.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ set(COMPILER_RT_GPU_BUILD ON)
+ set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
+ set(COMPILER_RT_GPU_BUILD ON)
+ set(CMAKE_REQUIRED_FLAGS
+ "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
+ endif()
+ endif()
+
# Determine if test target triple is specified explicitly, and doesn't match the
# default.
if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
@@ -466,6 +487,10 @@ function(get_compiler_rt_target arch variable)
endif()
endif()
set(target "${arch}${triple_suffix}")
+ elseif("${arch}" MATCHES "^amdgcn")
+ set(target "amdgcn-amd-amdhsa")
+ elseif("${arch}" MATCHES "^nvptx")
+ set(target "nvptx64-nvidia-cuda")
else()
set(target "${arch}${triple_suffix}")
endif()
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 87137b3d111b6..2d6342b28ea31 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -214,6 +214,10 @@ macro(test_targets)
test_target_arch(x86_64 "" "")
endif()
endif()
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
+ "-flto" "-fconvergent-functions"
+ "-Xclang -mcode-object-version=none")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
test_target_arch(loongarch64 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
@@ -254,6 +258,9 @@ macro(test_targets)
test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
+ test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
+ "-fconvergent-functions" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 33c97b1ac28af..1f63e158409ca 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -19,6 +19,10 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
+builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
+builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
+builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
+builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
@@ -52,6 +56,7 @@ else()
set(OS_NAME "${CMAKE_SYSTEM_NAME}")
endif()
+set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
set(AVR avr)
@@ -61,6 +66,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
+set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
@@ -78,8 +84,8 @@ if(APPLE)
endif()
set(ALL_BUILTIN_SUPPORTED_ARCH
- ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
- ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
+ ${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
+ ${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
@@ -245,7 +251,8 @@ else()
${ALL_BUILTIN_SUPPORTED_ARCH})
endif()
-if (OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER)
+if(OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER AND NOT
+ COMPILER_RT_GPU_BUILD)
set(COMPILER_RT_HAS_CRT TRUE)
else()
set(COMPILER_RT_HAS_CRT FALSE)
diff --git a/compiler-rt/cmake/caches/GPU.cmake b/compiler-rt/cmake/caches/GPU.cmake
new file mode 100644
index 0000000000000..e448774cf1457
--- /dev/null
+++ b/compiler-rt/cmake/caches/GPU.cmake
@@ -0,0 +1,18 @@
+# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
+# amdgcn and nvptx builds targeting the builtins library.
+
+set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
+
+set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
+set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 6778ae1c35263..52e4221a5dc4d 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -228,7 +228,7 @@ option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
"Skip the atomic builtin (these should normally be provided by a shared library)"
On)
-if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD AND NOT COMPILER_RT_GPU_BUILD)
set(GENERIC_SOURCES
${GENERIC_SOURCES}
emutls.c
@@ -619,6 +619,8 @@ if (MINGW)
)
endif()
+set(amdgcn_SOURCES ${GENERIC_SOURCES})
+
set(armv4t_SOURCES ${arm_min_SOURCES})
set(armv5te_SOURCES ${arm_min_SOURCES})
set(armv6_SOURCES ${arm_min_SOURCES})
@@ -698,6 +700,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
${mips_SOURCES})
+set(nvptx64_SOURCES ${GENERIC_SOURCES})
+
set(powerpc_SOURCES ${GENERIC_SOURCES})
set(powerpcspe_SOURCES ${GENERIC_SOURCES})
@@ -803,6 +807,21 @@ else ()
endif()
endif()
+ # Directly targeting the GPU requires a few extra flags.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
+ append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
+ -fconvergent-functions BUILTIN_CFLAGS)
+
+ # AMDGPU targets want to use a generic ABI.
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
+ "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
+ endif()
+ endif()
+
set(BUILTIN_DEFS "")
if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)
More information about the llvm-commits
mailing list