[libc] [llvm] [libc] Build the GPU during the projects setup like libc-hdrgen (PR #84667)

Sun Mar 10 07:32:44 PDT 2024

https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/84667

>From 0c155cf639203006816f68a3710461322980a855 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sat, 9 Mar 2024 20:13:17 -0600
Subject: [PATCH 1/2] [libc] Only enable `LLVM_FULL_BUILD_MODE` by default for
 GPU targets

Summary:
Currently we have a conditional that turns the full build on by default
if it is a default target. This used to work fine when the GPU was the
only target that was ever present. However, we've recently changed to
allow building multiple of these at the same time. That means we should
have the ability to build overlay mode in the CPU mode and full build in
the GPU mode. This patch makes some simple adjustments to pass the
arguments per-triple. This slightly extends the existing `-DRUNTIMES_`
argument support to also transform any extra CMake inputs rather than
just the passed CMake variables.
---
 llvm/CMakeLists.txt          | 17 +++++++--------
 llvm/runtimes/CMakeLists.txt | 42 +++++++++++++++++++++++++-----------
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 111c8cfa15d828..d0e33c29be58f8 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -173,16 +173,15 @@ endforeach()
 set(NEED_LIBC_HDRGEN FALSE)
 if("libc" IN_LIST LLVM_ENABLE_RUNTIMES)
   set(NEED_LIBC_HDRGEN TRUE)
-else()
-  foreach(_name ${LLVM_RUNTIME_TARGETS})
-    if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES)
-      set(NEED_LIBC_HDRGEN TRUE)
-      if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda")
-        set(LLVM_LIBC_GPU_BUILD ON)
-      endif()
-    endif()
-  endforeach()
 endif()
+foreach(_name ${LLVM_RUNTIME_TARGETS})
+  if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES)
+    set(NEED_LIBC_HDRGEN TRUE)
+    if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda")
+      set(LLVM_LIBC_GPU_BUILD ON)
+    endif()
+  endif()
+endforeach()
 if(NEED_LIBC_HDRGEN)
   # To build the libc runtime, we need to be able to build few libc build
   # tools from the "libc" project. So, we add it to the list of enabled
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 9b5e758b6ede57..3f3c482adccd37 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -358,6 +358,14 @@ function(runtime_register_target name)
         endif()
       endif()
     endforeach()
+    foreach(variable_name ${${name}_extra_args})
+      string(FIND "${variable_name}" "-DRUNTIMES_${extra_name}_" out)
+      if("${out}" EQUAL 0)
+        string(REPLACE "-DRUNTIMES_${extra_name}_" "" new_name ${variable_name})
+        string(REPLACE ";" "|" new_value "${new_name}")
+        list(APPEND ${name}_extra_args "-D${new_value}")
+      endif()
+    endforeach()
   endforeach()
 
   set_enable_per_target_runtime_dir()
@@ -438,21 +446,29 @@ if(runtimes)
     if(NOT hdrgen_exe)
       message(FATAL_ERROR "libc-hdrgen executable missing")
     endif()
-    set(libc_cmake_args "-DLIBC_HDRGEN_EXE=${hdrgen_exe}"
-                        "-DLLVM_LIBC_FULL_BUILD=ON")
+    list(APPEND libc_cmake_args "-DLIBC_HDRGEN_EXE=${hdrgen_exe}")
     list(APPEND extra_deps ${hdrgen_deps})
-    if(LLVM_LIBC_GPU_BUILD)
-      list(APPEND libc_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
-      # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode.
-      if(CUDAToolkit_ROOT)
-        list(APPEND libc_cmake_args "-DCUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
-      endif()
-      foreach(dep clang-offload-packager nvptx-arch amdgpu-arch)
-        if(TARGET ${dep})
-          list(APPEND extra_deps ${dep})
-        endif()
-      endforeach()
+  endif()
+  if(LLVM_LIBC_GPU_BUILD)
+    list(APPEND libc_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
+    if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
+      list(APPEND libc_cmake_args "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_LIBC_FULL_BUILD=ON")
     endif()
+    if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
+      list(APPEND libc_cmake_args "-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_LIBC_FULL_BUILD=ON")
+    endif()
+    # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode.
+    if(CUDAToolkit_ROOT)
+      list(APPEND libc_cmake_args "-DCUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
+    endif()
+    foreach(dep clang-offload-packager nvptx-arch amdgpu-arch)
+      if(TARGET ${dep})
+        list(APPEND extra_deps ${dep})
+      endif()
+    endforeach()
+  endif()
+  if(LLVM_LIBC_FULL_BUILD)
+    list(APPEND libc_cmake_args "-DLLVM_LIBC_FULL_BUILD=ON")
   endif()
   if(NOT LLVM_RUNTIME_TARGETS)
     runtime_default_target(

>From d6b09d36066c74590b4a21d354e33be090c720e7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sat, 9 Mar 2024 22:05:39 -0600
Subject: [PATCH 2/2] [libc] Build the GPU during the projects setup like
 libc-hdrgen

Summary:
The libc build has a few utilties that need to be built before we can do
everything in the full build. The one requirement currently is the
`libc-hdrgen` binary. If we are doing a full build runtimes mode we
first add `libc` to the projects list and then only use the `projects`
portion to buld the `libc` portion. We also use utilities for the GPU
build, namely the loader utilities. Previously we would build these
tools on-demand inside of the cross-build, which tool some hacky
workarounds for the dependency finding and target triple. This patch
instead just builds them similarly to libc-hdrgen and then passses them
in. We now either pass it manually it it was built, or just look it up
like we do with the other `clang` tools.

Depends on https://github.com/llvm/llvm-project/pull/84664
---
 libc/CMakeLists.txt                           |  9 ++--
 .../modules/prepare_libc_gpu_build.cmake      | 42 +++++++++++++++----
 libc/utils/CMakeLists.txt                     |  3 --
 libc/utils/gpu/CMakeLists.txt                 |  4 +-
 libc/utils/gpu/loader/CMakeLists.txt          | 32 +++-----------
 libc/utils/gpu/loader/amdgpu/CMakeLists.txt   |  1 -
 libc/utils/gpu/loader/nvptx/CMakeLists.txt    |  4 +-
 libc/utils/gpu/server/CMakeLists.txt          |  9 ----
 llvm/CMakeLists.txt                           |  4 ++
 llvm/runtimes/CMakeLists.txt                  | 18 +++++---
 10 files changed, 65 insertions(+), 61 deletions(-)

diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index b4a2523b778877..6edf5c656193db 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -60,6 +60,10 @@ if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
     message(STATUS "Will use ${LIBC_HDRGEN_EXE} for libc header generation.")
   endif()
 endif()
+# We will build the GPU utilities if we are not doing a runtimes build.
+if(LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD)
+  add_subdirectory(utils/gpu)
+endif()
 
 set(NEED_LIBC_HDRGEN FALSE)
 if(NOT LLVM_RUNTIMES_BUILD)
@@ -79,11 +83,6 @@ if(LIBC_HDRGEN_ONLY OR NEED_LIBC_HDRGEN)
   # When libc is build as part of the runtimes/bootstrap build's CMake run, we
   # only need to build the host tools to build the libc. So, we just do enough
   # to build libc-hdrgen and return.
-
-  # Always make the RPC server availible to other projects for GPU mode.
-  if(LLVM_LIBC_GPU_BUILD)
-    add_subdirectory(utils/gpu/server)
-  endif()
   return()
 endif()
 unset(NEED_LIBC_HDRGEN)
diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake
index 2de4cb8d82b28b..bea6bb016491b6 100644
--- a/libc/cmake/modules/prepare_libc_gpu_build.cmake
+++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake
@@ -93,6 +93,41 @@ else()
 endif()
 set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}")
 
+# Identify the GPU loader utility used to run tests.
+set(LIBC_GPU_LOADER_EXECUTABLE "" CACHE STRING "Executable for the GPU loader.")
+if(LIBC_GPU_LOADER_EXECUTABLE)
+  set(gpu_loader_executable ${LIBC_GPU_LOADER_EXECUTABLE})
+elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+  find_program(LIBC_AMDHSA_LOADER_EXECUTABLE
+               NAMES amdhsa-loader NO_DEFAULT_PATH
+               PATHS ${LLVM_BINARY_DIR}/bin ${compiler_path})
+  if(LIBC_AMDHSA_LOADER_EXECUTABLE)
+    set(gpu_loader_executable ${LIBC_AMDHSA_LOADER_EXECUTABLE})
+  endif()
+elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
+  find_program(LIBC_NVPTX_LOADER_EXECUTABLE
+               NAMES nvptx-loader NO_DEFAULT_PATH
+               PATHS ${LLVM_BINARY_DIR}/bin ${compiler_path})
+  if(LIBC_NVPTX_LOADER_EXECUTABLE)
+    set(gpu_loader_executable ${LIBC_NVPTX_LOADER_EXECUTABLE})
+  endif()
+endif()
+if(NOT TARGET libc.utils.gpu.loader AND gpu_loader_executable)
+  add_custom_target(libc.utils.gpu.loader)
+  set_target_properties(
+    libc.utils.gpu.loader
+    PROPERTIES
+      EXECUTABLE "${gpu_loader_executable}"
+  )
+endif()
+
+if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+  # The AMDGPU environment uses different code objects to encode the ABI for
+  # kernel calls and intrinsic functions. We want to specify this manually to
+  # conform to whatever the test suite was built to handle.
+  set(LIBC_GPU_CODE_OBJECT_VERSION 5)
+endif()
+
 if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
   # FIXME: This is a hack required to keep the CUDA package from trying to find
   #        pthreads. We only link the CUDA driver, so this is unneeded.
@@ -103,10 +138,3 @@ if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
     get_filename_component(LIBC_CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
   endif()
 endif()
-
-if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-  # The AMDGPU environment uses different code objects to encode the ABI for
-  # kernel calls and intrinsic functions. We want to specify this manually to
-  # conform to whatever the test suite was built to handle.
-  set(LIBC_GPU_CODE_OBJECT_VERSION 5)
-endif()
diff --git a/libc/utils/CMakeLists.txt b/libc/utils/CMakeLists.txt
index 7bf02a4af7deae..11f25503cc13e2 100644
--- a/libc/utils/CMakeLists.txt
+++ b/libc/utils/CMakeLists.txt
@@ -1,6 +1,3 @@
 if(LLVM_INCLUDE_TESTS)
   add_subdirectory(MPFRWrapper)
 endif()
-if(LIBC_TARGET_OS_IS_GPU)
-  add_subdirectory(gpu)
-endif()
diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt
index 4d1ebcfb9f8e65..7c15f36052cf3b 100644
--- a/libc/utils/gpu/CMakeLists.txt
+++ b/libc/utils/gpu/CMakeLists.txt
@@ -1,4 +1,2 @@
 add_subdirectory(server)
-if(LIBC_TARGET_OS_IS_GPU)
-  add_subdirectory(loader)
-endif()
+add_subdirectory(loader)
diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt
index 189460bb02e6e5..61252b07de0c47 100644
--- a/libc/utils/gpu/loader/CMakeLists.txt
+++ b/libc/utils/gpu/loader/CMakeLists.txt
@@ -6,37 +6,18 @@ target_include_directories(gpu_loader PUBLIC
   ${LIBC_SOURCE_DIR}
 )
 
-# This utility needs to be compiled for the host system when cross compiling.
-if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
-  target_compile_options(gpu_loader PUBLIC --target=${LLVM_HOST_TRIPLE})
-  target_link_libraries(gpu_loader PUBLIC "--target=${LLVM_HOST_TRIPLE}")
-endif()
-
 find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
-if(hsa-runtime64_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+if(hsa-runtime64_FOUND)
   add_subdirectory(amdgpu)
-elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-  message(STATUS "Skipping HSA loader for gpu target, no HSA was detected")
 endif()
 
 # The CUDA loader requires LLVM to traverse the ELF image for symbols.
-find_package(LLVM QUIET)
-if(CUDAToolkit_FOUND AND LLVM_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
+find_package(CUDAToolkit 11.2 QUIET)
+if(CUDAToolkit_FOUND)
   add_subdirectory(nvptx)
-elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
-  message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected")
 endif()
 
-# Add a custom target to be used for testing.
-set(LIBC_GPU_LOADER_EXECUTABLE "" CACHE STRING "Overriding binary for the GPU loader.")
-if(LIBC_GPU_LOADER_EXECUTABLE)
-  add_custom_target(libc.utils.gpu.loader)
-  set_target_properties(
-    libc.utils.gpu.loader
-    PROPERTIES
-      EXECUTABLE "${LIBC_GPU_LOADER_EXECUTABLE}"
-  )
-elseif(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+if(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
   add_custom_target(libc.utils.gpu.loader)
   add_dependencies(libc.utils.gpu.loader amdhsa-loader)
   set_target_properties(
@@ -56,11 +37,10 @@ elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
   )
 endif()
 
-if(TARGET libc.utils.gpu.loader)
-  get_target_property(gpu_loader_tgt libc.utils.gpu.loader "TARGET")
+foreach(gpu_loader_tgt amdhsa-loader nvptx-loader)
   if(gpu_loader_tgt)
     install(TARGETS ${gpu_loader_tgt}
             DESTINATION ${CMAKE_INSTALL_BINDIR}
             COMPONENT libc)
   endif()
-endif()
+endforeach()
diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
index b99319f5040112..97a2de9f8379ab 100644
--- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
+++ b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_executable(amdhsa-loader Loader.cpp)
-add_dependencies(amdhsa-loader libc.src.__support.RPC.rpc)
 
 target_link_libraries(amdhsa-loader
   PRIVATE
diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
index e76362a1e8cca6..948493959badf2 100644
--- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt
+++ b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
@@ -1,10 +1,10 @@
 add_executable(nvptx-loader Loader.cpp)
-add_dependencies(nvptx-loader libc.src.__support.RPC.rpc)
 
 if(NOT LLVM_ENABLE_RTTI)
   target_compile_options(nvptx-loader PRIVATE -fno-rtti)
 endif()
-target_include_directories(nvptx-loader PRIVATE ${LLVM_INCLUDE_DIRS})
+target_include_directories(nvptx-loader PRIVATE
+                           ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_BINARY_DIR}/include)
 target_link_libraries(nvptx-loader
   PRIVATE
   gpu_loader
diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt
index 8712f24de84f7b..6930e145e58e62 100644
--- a/libc/utils/gpu/server/CMakeLists.txt
+++ b/libc/utils/gpu/server/CMakeLists.txt
@@ -5,21 +5,12 @@ target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR})
 target_include_directories(llvmlibc_rpc_server PUBLIC ${LIBC_SOURCE_DIR}/include)
 target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 
-
 # Ignore unsupported clang attributes if we're using GCC.
 target_compile_options(llvmlibc_rpc_server PUBLIC
                        $<$<CXX_COMPILER_ID:GNU>:-Wno-attributes>)
 target_compile_definitions(llvmlibc_rpc_server PUBLIC
                            LIBC_NAMESPACE=${LIBC_NAMESPACE})
 
-# This utility needs to be compiled for the host system when cross compiling.
-if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
-  target_compile_options(llvmlibc_rpc_server PUBLIC
-                         --target=${LLVM_HOST_TRIPLE})
-  target_link_libraries(llvmlibc_rpc_server PUBLIC
-                        "--target=${LLVM_HOST_TRIPLE}")
-endif()
-
 # Install the server and associated header.
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/llvmlibc_rpc_server.h
         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index d0e33c29be58f8..494d8abeb64d21 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -182,6 +182,10 @@ foreach(_name ${LLVM_RUNTIME_TARGETS})
     endif()
   endif()
 endforeach()
+if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR
+   "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda")
+  set(LLVM_LIBC_GPU_BUILD ON)
+endif()
 if(NEED_LIBC_HDRGEN)
   # To build the libc runtime, we need to be able to build few libc build
   # tools from the "libc" project. So, we add it to the list of enabled
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 3f3c482adccd37..54ec8bd28d4ca6 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -452,20 +452,28 @@ if(runtimes)
   if(LLVM_LIBC_GPU_BUILD)
     list(APPEND libc_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
     if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
+      if(TARGET amdhsa-loader)
+        list(APPEND libc_cmake_args
+             "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>")
+        list(APPEND extra_deps amdhsa-loader amdgpu-arch)
+      endif()
       list(APPEND libc_cmake_args "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_LIBC_FULL_BUILD=ON")
     endif()
     if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
+      if(TARGET nvptx-loader)
+        list(APPEND libc_cmake_args
+             "-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>")
+        list(APPEND extra_deps nvptx-loader nvptx-arch)
+      endif()
       list(APPEND libc_cmake_args "-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_LIBC_FULL_BUILD=ON")
     endif()
     # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode.
     if(CUDAToolkit_ROOT)
       list(APPEND libc_cmake_args "-DCUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
     endif()
-    foreach(dep clang-offload-packager nvptx-arch amdgpu-arch)
-      if(TARGET ${dep})
-        list(APPEND extra_deps ${dep})
-      endif()
-    endforeach()
+    if(TARGET clang-offload-packager)
+      list(APPEND extra_deps clang-offload-packager)
+    endif()
   endif()
   if(LLVM_LIBC_FULL_BUILD)
     list(APPEND libc_cmake_args "-DLLVM_LIBC_FULL_BUILD=ON")