[libc-commits] [libc] [llvm] [libc] Build the GPU during the projects setup like libc-hdrgen (PR #84667)

Mon Mar 11 07:06:50 PDT 2024

https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/84667

>From 8441da3efbdb68dcab45c4444a95a36c754956a7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sat, 9 Mar 2024 22:05:39 -0600
Subject: [PATCH] [libc] Build the GPU during the projects setup like
 libc-hdrgen

Summary:
The libc build has a few utilties that need to be built before we can do
everything in the full build. The one requirement currently is the
`libc-hdrgen` binary. If we are doing a full build runtimes mode we
first add `libc` to the projects list and then only use the `projects`
portion to buld the `libc` portion. We also use utilities for the GPU
build, namely the loader utilities. Previously we would build these
tools on-demand inside of the cross-build, which tool some hacky
workarounds for the dependency finding and target triple. This patch
instead just builds them similarly to libc-hdrgen and then passses them
in. We now either pass it manually it it was built, or just look it up
like we do with the other `clang` tools.

Depends on https://github.com/llvm/llvm-project/pull/84664
---
 libc/CMakeLists.txt                           |  9 ++--
 .../modules/prepare_libc_gpu_build.cmake      | 42 +++++++++++++++----
 libc/utils/CMakeLists.txt                     |  3 --
 libc/utils/gpu/CMakeLists.txt                 |  4 +-
 libc/utils/gpu/loader/CMakeLists.txt          | 34 ++++-----------
 libc/utils/gpu/loader/amdgpu/CMakeLists.txt   |  1 -
 libc/utils/gpu/loader/nvptx/CMakeLists.txt    |  4 +-
 libc/utils/gpu/server/CMakeLists.txt          |  9 ----
 llvm/CMakeLists.txt                           |  4 ++
 llvm/runtimes/CMakeLists.txt                  | 18 +++++---
 10 files changed, 66 insertions(+), 62 deletions(-)

diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index b4a2523b778877..6edf5c656193db 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -60,6 +60,10 @@ if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
     message(STATUS "Will use ${LIBC_HDRGEN_EXE} for libc header generation.")
   endif()
 endif()
+# We will build the GPU utilities if we are not doing a runtimes build.
+if(LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD)
+  add_subdirectory(utils/gpu)
+endif()
 
 set(NEED_LIBC_HDRGEN FALSE)
 if(NOT LLVM_RUNTIMES_BUILD)
@@ -79,11 +83,6 @@ if(LIBC_HDRGEN_ONLY OR NEED_LIBC_HDRGEN)
   # When libc is build as part of the runtimes/bootstrap build's CMake run, we
   # only need to build the host tools to build the libc. So, we just do enough
   # to build libc-hdrgen and return.
-
-  # Always make the RPC server availible to other projects for GPU mode.
-  if(LLVM_LIBC_GPU_BUILD)
-    add_subdirectory(utils/gpu/server)
-  endif()
   return()
 endif()
 unset(NEED_LIBC_HDRGEN)
diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake
index 2de4cb8d82b28b..bea6bb016491b6 100644
--- a/libc/cmake/modules/prepare_libc_gpu_build.cmake
+++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake
@@ -93,6 +93,41 @@ else()
 endif()
 set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}")
 
+# Identify the GPU loader utility used to run tests.
+set(LIBC_GPU_LOADER_EXECUTABLE "" CACHE STRING "Executable for the GPU loader.")
+if(LIBC_GPU_LOADER_EXECUTABLE)
+  set(gpu_loader_executable ${LIBC_GPU_LOADER_EXECUTABLE})
+elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+  find_program(LIBC_AMDHSA_LOADER_EXECUTABLE
+               NAMES amdhsa-loader NO_DEFAULT_PATH
+               PATHS ${LLVM_BINARY_DIR}/bin ${compiler_path})
+  if(LIBC_AMDHSA_LOADER_EXECUTABLE)
+    set(gpu_loader_executable ${LIBC_AMDHSA_LOADER_EXECUTABLE})
+  endif()
+elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
+  find_program(LIBC_NVPTX_LOADER_EXECUTABLE
+               NAMES nvptx-loader NO_DEFAULT_PATH
+               PATHS ${LLVM_BINARY_DIR}/bin ${compiler_path})
+  if(LIBC_NVPTX_LOADER_EXECUTABLE)
+    set(gpu_loader_executable ${LIBC_NVPTX_LOADER_EXECUTABLE})
+  endif()
+endif()
+if(NOT TARGET libc.utils.gpu.loader AND gpu_loader_executable)
+  add_custom_target(libc.utils.gpu.loader)
+  set_target_properties(
+    libc.utils.gpu.loader
+    PROPERTIES
+      EXECUTABLE "${gpu_loader_executable}"
+  )
+endif()
+
+if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+  # The AMDGPU environment uses different code objects to encode the ABI for
+  # kernel calls and intrinsic functions. We want to specify this manually to
+  # conform to whatever the test suite was built to handle.
+  set(LIBC_GPU_CODE_OBJECT_VERSION 5)
+endif()
+
 if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
   # FIXME: This is a hack required to keep the CUDA package from trying to find
   #        pthreads. We only link the CUDA driver, so this is unneeded.
@@ -103,10 +138,3 @@ if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
     get_filename_component(LIBC_CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
   endif()
 endif()
-
-if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-  # The AMDGPU environment uses different code objects to encode the ABI for
-  # kernel calls and intrinsic functions. We want to specify this manually to
-  # conform to whatever the test suite was built to handle.
-  set(LIBC_GPU_CODE_OBJECT_VERSION 5)
-endif()
diff --git a/libc/utils/CMakeLists.txt b/libc/utils/CMakeLists.txt
index 7bf02a4af7deae..11f25503cc13e2 100644
--- a/libc/utils/CMakeLists.txt
+++ b/libc/utils/CMakeLists.txt
@@ -1,6 +1,3 @@
 if(LLVM_INCLUDE_TESTS)
   add_subdirectory(MPFRWrapper)
 endif()
-if(LIBC_TARGET_OS_IS_GPU)
-  add_subdirectory(gpu)
-endif()
diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt
index 4d1ebcfb9f8e65..7c15f36052cf3b 100644
--- a/libc/utils/gpu/CMakeLists.txt
+++ b/libc/utils/gpu/CMakeLists.txt
@@ -1,4 +1,2 @@
 add_subdirectory(server)
-if(LIBC_TARGET_OS_IS_GPU)
-  add_subdirectory(loader)
-endif()
+add_subdirectory(loader)
diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt
index 189460bb02e6e5..f755ba75b230c2 100644
--- a/libc/utils/gpu/loader/CMakeLists.txt
+++ b/libc/utils/gpu/loader/CMakeLists.txt
@@ -6,37 +6,18 @@ target_include_directories(gpu_loader PUBLIC
   ${LIBC_SOURCE_DIR}
 )
 
-# This utility needs to be compiled for the host system when cross compiling.
-if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
-  target_compile_options(gpu_loader PUBLIC --target=${LLVM_HOST_TRIPLE})
-  target_link_libraries(gpu_loader PUBLIC "--target=${LLVM_HOST_TRIPLE}")
-endif()
-
 find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
-if(hsa-runtime64_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+if(hsa-runtime64_FOUND)
   add_subdirectory(amdgpu)
-elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-  message(STATUS "Skipping HSA loader for gpu target, no HSA was detected")
 endif()
 
 # The CUDA loader requires LLVM to traverse the ELF image for symbols.
-find_package(LLVM QUIET)
-if(CUDAToolkit_FOUND AND LLVM_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
+find_package(CUDAToolkit 11.2 QUIET)
+if(CUDAToolkit_FOUND)
   add_subdirectory(nvptx)
-elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
-  message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected")
 endif()
 
-# Add a custom target to be used for testing.
-set(LIBC_GPU_LOADER_EXECUTABLE "" CACHE STRING "Overriding binary for the GPU loader.")
-if(LIBC_GPU_LOADER_EXECUTABLE)
-  add_custom_target(libc.utils.gpu.loader)
-  set_target_properties(
-    libc.utils.gpu.loader
-    PROPERTIES
-      EXECUTABLE "${LIBC_GPU_LOADER_EXECUTABLE}"
-  )
-elseif(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+if(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
   add_custom_target(libc.utils.gpu.loader)
   add_dependencies(libc.utils.gpu.loader amdhsa-loader)
   set_target_properties(
@@ -56,11 +37,10 @@ elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
   )
 endif()
 
-if(TARGET libc.utils.gpu.loader)
-  get_target_property(gpu_loader_tgt libc.utils.gpu.loader "TARGET")
-  if(gpu_loader_tgt)
+foreach(gpu_loader_tgt amdhsa-loader nvptx-loader)
+  if(TARGET gpu_loader_tgt)
     install(TARGETS ${gpu_loader_tgt}
             DESTINATION ${CMAKE_INSTALL_BINDIR}
             COMPONENT libc)
   endif()
-endif()
+endforeach()
diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
index b99319f5040112..97a2de9f8379ab 100644
--- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
+++ b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_executable(amdhsa-loader Loader.cpp)
-add_dependencies(amdhsa-loader libc.src.__support.RPC.rpc)
 
 target_link_libraries(amdhsa-loader
   PRIVATE
diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
index e76362a1e8cca6..948493959badf2 100644
--- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt
+++ b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
@@ -1,10 +1,10 @@
 add_executable(nvptx-loader Loader.cpp)
-add_dependencies(nvptx-loader libc.src.__support.RPC.rpc)
 
 if(NOT LLVM_ENABLE_RTTI)
   target_compile_options(nvptx-loader PRIVATE -fno-rtti)
 endif()
-target_include_directories(nvptx-loader PRIVATE ${LLVM_INCLUDE_DIRS})
+target_include_directories(nvptx-loader PRIVATE
+                           ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_BINARY_DIR}/include)
 target_link_libraries(nvptx-loader
   PRIVATE
   gpu_loader
diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt
index 10cfdb45a2c9d2..6fca72cfae95fb 100644
--- a/libc/utils/gpu/server/CMakeLists.txt
+++ b/libc/utils/gpu/server/CMakeLists.txt
@@ -5,21 +5,12 @@ target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR})
 target_include_directories(llvmlibc_rpc_server PUBLIC ${LIBC_SOURCE_DIR}/include)
 target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 
-
 # Ignore unsupported clang attributes if we're using GCC.
 target_compile_options(llvmlibc_rpc_server PUBLIC
                        $<$<CXX_COMPILER_ID:GNU>:-Wno-attributes>)
 target_compile_definitions(llvmlibc_rpc_server PUBLIC
                            LIBC_NAMESPACE=${LIBC_NAMESPACE})
 
-# This utility needs to be compiled for the host system when cross compiling.
-if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
-  target_compile_options(llvmlibc_rpc_server PUBLIC
-                         --target=${LLVM_HOST_TRIPLE})
-  target_link_libraries(llvmlibc_rpc_server PUBLIC
-                        "--target=${LLVM_HOST_TRIPLE}")
-endif()
-
 # Install the server and associated header.
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/llvmlibc_rpc_server.h
         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index d0e33c29be58f8..494d8abeb64d21 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -182,6 +182,10 @@ foreach(_name ${LLVM_RUNTIME_TARGETS})
     endif()
   endif()
 endforeach()
+if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR
+   "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda")
+  set(LLVM_LIBC_GPU_BUILD ON)
+endif()
 if(NEED_LIBC_HDRGEN)
   # To build the libc runtime, we need to be able to build few libc build
   # tools from the "libc" project. So, we add it to the list of enabled
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 3f3c482adccd37..54ec8bd28d4ca6 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -452,20 +452,28 @@ if(runtimes)
   if(LLVM_LIBC_GPU_BUILD)
     list(APPEND libc_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
     if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
+      if(TARGET amdhsa-loader)
+        list(APPEND libc_cmake_args
+             "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>")
+        list(APPEND extra_deps amdhsa-loader amdgpu-arch)
+      endif()
       list(APPEND libc_cmake_args "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_LIBC_FULL_BUILD=ON")
     endif()
     if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
+      if(TARGET nvptx-loader)
+        list(APPEND libc_cmake_args
+             "-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>")
+        list(APPEND extra_deps nvptx-loader nvptx-arch)
+      endif()
       list(APPEND libc_cmake_args "-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_LIBC_FULL_BUILD=ON")
     endif()
     # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode.
     if(CUDAToolkit_ROOT)
       list(APPEND libc_cmake_args "-DCUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
     endif()
-    foreach(dep clang-offload-packager nvptx-arch amdgpu-arch)
-      if(TARGET ${dep})
-        list(APPEND extra_deps ${dep})
-      endif()
-    endforeach()
+    if(TARGET clang-offload-packager)
+      list(APPEND extra_deps clang-offload-packager)
+    endif()
   endif()
   if(LLVM_LIBC_FULL_BUILD)
     list(APPEND libc_cmake_args "-DLLVM_LIBC_FULL_BUILD=ON")