[compiler-rt] dad7442 - [compiler-rt] Initial support for builtins on GPU targets (#95304)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 10 13:07:14 PDT 2024


Author: Joseph Huber
Date: 2024-07-10T15:07:11-05:00
New Revision: dad7442aff5c85ff9141b0d0f231bcd731cbadc6

URL: https://github.com/llvm/llvm-project/commit/dad7442aff5c85ff9141b0d0f231bcd731cbadc6
DIFF: https://github.com/llvm/llvm-project/commit/dad7442aff5c85ff9141b0d0f231bcd731cbadc6.diff

LOG: [compiler-rt] Initial support for builtins on GPU targets (#95304)

Summary:
This patch adds initial support to build the `builtins` library for GPU
targets. Primarily this requires adding a few new architectures for
`amdgcn` and `nvptx64`. I built this using the following invocations.

```console
$ cmake ../compiler-rt -DCMAKE_C_COMPILER=clang
  -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -GNinja
  -DCMAKE_C_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
  -DCMAKE_CXX_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
  -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1
  -DLLVM_CMAKE_DIR=../cmake/Modules -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
  -C ../compiler-rt/cmake/caches/GPU.cmake
```

Some pointers would be appreciated for how to test this using a standard
(non-default target only) build.

GPU builds are somewhat finnicky. We only expect this to be built with a
sufficiently new clang, as it's the only compiler that supports the
target and output we distribute. Distribution is done as LLVM-IR blobs
for now.
GPUs have little backward compatibility, so linking object files is
left to a future patch.

More work is necessary to build correctly for all targets and ship into
the correct clang resource directory. Additionally we need to use the
`libc` project's support for running unit tests.

Added: 
    compiler-rt/cmake/caches/GPU.cmake

Modified: 
    compiler-rt/cmake/Modules/AddCompilerRT.cmake
    compiler-rt/cmake/Modules/BuiltinTests.cmake
    compiler-rt/cmake/Modules/CompilerRTUtils.cmake
    compiler-rt/cmake/base-config-ix.cmake
    compiler-rt/cmake/builtin-config-ix.cmake
    compiler-rt/lib/builtins/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 298093462f80b..6962b733733a6 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -175,8 +175,9 @@ function(add_compiler_rt_runtime name type)
     ${ARGN})
   set(libnames)
   # Until we support this some other way, build compiler-rt runtime without LTO
-  # to allow non-LTO projects to link with it.
-  if(COMPILER_RT_HAS_FNO_LTO_FLAG)
+  # to allow non-LTO projects to link with it. GPU targets can currently only be
+  # distributed as LLVM-IR and ignore this.
+  if(COMPILER_RT_HAS_FNO_LTO_FLAG AND NOT COMPILER_RT_GPU_BUILD)
     set(NO_LTO_FLAGS "-fno-lto")
   else()
     set(NO_LTO_FLAGS "")

diff  --git a/compiler-rt/cmake/Modules/BuiltinTests.cmake b/compiler-rt/cmake/Modules/BuiltinTests.cmake
index 7d71ca3f71efd..63c5f47cb5010 100644
--- a/compiler-rt/cmake/Modules/BuiltinTests.cmake
+++ b/compiler-rt/cmake/Modules/BuiltinTests.cmake
@@ -112,7 +112,7 @@ endfunction()
 function(builtin_check_c_compiler_flag flag output)
   if(NOT DEFINED ${output})
     message(STATUS "Performing Test ${output}")
-    try_compile_only(result FLAGS ${flag})
+    try_compile_only(result FLAGS ${flag} ${CMAKE_REQUIRED_FLAGS})
     set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
     if(${result})
       message(STATUS "Performing Test ${output} - Success")

diff  --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index cec7af929fb2b..58b02fe48cd8f 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -146,6 +146,7 @@ macro(test_target_arch arch def)
 endmacro()
 
 macro(detect_target_arch)
+  check_symbol_exists(__AMDGPU__ "" __AMDGPU)
   check_symbol_exists(__arm__ "" __ARM)
   check_symbol_exists(__AVR__ "" __AVR)
   check_symbol_exists(__aarch64__ "" __AARCH64)
@@ -154,6 +155,7 @@ macro(detect_target_arch)
   check_symbol_exists(__loongarch__ "" __LOONGARCH)
   check_symbol_exists(__mips__ "" __MIPS)
   check_symbol_exists(__mips64__ "" __MIPS64)
+  check_symbol_exists(__NVPTX__ "" __NVPTX)
   check_symbol_exists(__powerpc__ "" __PPC)
   check_symbol_exists(__powerpc64__ "" __PPC64)
   check_symbol_exists(__powerpc64le__ "" __PPC64LE)
@@ -164,7 +166,9 @@ macro(detect_target_arch)
   check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
   check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
   check_symbol_exists(__ve__ "" __VE)
-  if(__ARM)
+  if(__AMDGPU)
+    add_default_target_arch(amdgcn)
+  elseif(__ARM)
     add_default_target_arch(arm)
   elseif(__AVR)
     add_default_target_arch(avr)
@@ -192,6 +196,8 @@ macro(detect_target_arch)
     add_default_target_arch(mips64)
   elseif(__MIPS)
     add_default_target_arch(mips)
+  elseif(__NVPTX)
+    add_default_target_arch(nvptx64)
   elseif(__PPC64) # must be checked before __PPC
     add_default_target_arch(powerpc64)
   elseif(__PPC64LE)
@@ -397,6 +403,21 @@ macro(construct_compiler_rt_default_triple)
     set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
   endif()
 
+  # If we are directly targeting a GPU we need to check that the compiler is
+  # compatible and pass some default arguments.
+  if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+
+    # Pass the necessary flags to make flag detection work.
+    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+      set(COMPILER_RT_GPU_BUILD ON)
+      set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
+      set(COMPILER_RT_GPU_BUILD ON)
+      set(CMAKE_REQUIRED_FLAGS
+          "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
+    endif()
+  endif()
+
   # Determine if test target triple is specified explicitly, and doesn't match the
   # default.
   if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
@@ -475,6 +496,10 @@ function(get_compiler_rt_target arch variable)
       endif()
     endif()
     set(target "${arch}${triple_suffix}")
+  elseif("${arch}" MATCHES "^amdgcn")
+    set(target "amdgcn-amd-amdhsa")
+  elseif("${arch}" MATCHES "^nvptx")
+    set(target "nvptx64-nvidia-cuda")
   else()
     set(target "${arch}${triple_suffix}")
   endif()

diff  --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 87137b3d111b6..2d6342b28ea31 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -214,6 +214,10 @@ macro(test_targets)
           test_target_arch(x86_64 "" "")
         endif()
       endif()
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+      test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
+                       "-flto" "-fconvergent-functions"
+                       "-Xclang -mcode-object-version=none")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
       test_target_arch(loongarch64 "" "")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
@@ -254,6 +258,9 @@ macro(test_targets)
         test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
         test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
       endif()
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
+      test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
+                       "-fconvergent-functions" "-c")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
       if(WIN32)
         test_target_arch(arm "" "" "")

diff  --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 33c97b1ac28af..1f63e158409ca 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -19,6 +19,10 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
 builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
 builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
 builtin_check_c_compiler_flag(-Wno-pedantic         COMPILER_RT_HAS_WNO_PEDANTIC)
+builtin_check_c_compiler_flag(-nogpulib             COMPILER_RT_HAS_NOGPULIB_FLAG)
+builtin_check_c_compiler_flag(-flto                 COMPILER_RT_HAS_FLTO_FLAG)
+builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
+builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
 builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
 builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
 
@@ -52,6 +56,7 @@ else()
   set(OS_NAME "${CMAKE_SYSTEM_NAME}")
 endif()
 
+set(AMDGPU amdgcn)
 set(ARM64 aarch64)
 set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
 set(AVR avr)
@@ -61,6 +66,7 @@ set(X86_64 x86_64)
 set(LOONGARCH64 loongarch64)
 set(MIPS32 mips mipsel)
 set(MIPS64 mips64 mips64el)
+set(NVPTX nvptx64)
 set(PPC32 powerpc powerpcspe)
 set(PPC64 powerpc64 powerpc64le)
 set(RISCV32 riscv32)
@@ -78,8 +84,8 @@ if(APPLE)
 endif()
 
 set(ALL_BUILTIN_SUPPORTED_ARCH
-  ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
-  ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
+  ${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
+  ${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
   ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
   ${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
 
@@ -245,7 +251,8 @@ else()
     ${ALL_BUILTIN_SUPPORTED_ARCH})
 endif()
 
-if (OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER)
+if(OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER AND NOT
+   COMPILER_RT_GPU_BUILD)
   set(COMPILER_RT_HAS_CRT TRUE)
 else()
   set(COMPILER_RT_HAS_CRT FALSE)

diff  --git a/compiler-rt/cmake/caches/GPU.cmake b/compiler-rt/cmake/caches/GPU.cmake
new file mode 100644
index 0000000000000..e448774cf1457
--- /dev/null
+++ b/compiler-rt/cmake/caches/GPU.cmake
@@ -0,0 +1,18 @@
+# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
+# amdgcn and nvptx builds targeting the builtins library.
+
+set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
+
+set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
+set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")

diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index ba81c78f16608..f780b917cbdb1 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -228,7 +228,7 @@ option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
   "Skip the atomic builtin (these should normally be provided by a shared library)"
   On)
 
-if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD AND NOT COMPILER_RT_GPU_BUILD)
   set(GENERIC_SOURCES
     ${GENERIC_SOURCES}
     emutls.c
@@ -627,6 +627,8 @@ if (MINGW)
   )
 endif()
 
+set(amdgcn_SOURCES ${GENERIC_SOURCES})
+
 set(armv4t_SOURCES ${arm_min_SOURCES})
 set(armv5te_SOURCES ${arm_min_SOURCES})
 set(armv6_SOURCES ${arm_min_SOURCES})
@@ -706,6 +708,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
 set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
                      ${mips_SOURCES})
 
+set(nvptx64_SOURCES ${GENERIC_SOURCES})
+
 set(powerpc_SOURCES ${GENERIC_SOURCES})
 
 set(powerpcspe_SOURCES ${GENERIC_SOURCES})
@@ -811,6 +815,21 @@ else ()
     endif()
   endif()
 
+  # Directly targeting the GPU requires a few extra flags.
+  if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
+    append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
+                   -fconvergent-functions BUILTIN_CFLAGS)
+
+    # AMDGPU targets want to use a generic ABI.
+    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+      append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
+                     "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
+    endif()
+  endif()
+
   set(BUILTIN_DEFS "")
 
   if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)


        


More information about the llvm-commits mailing list