[libc-commits] [libc] 550b83d - Revert "[libc] Remove 'packaged' GPU build support (#100208)"
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Wed Jul 24 05:52:22 PDT 2024
Author: Joseph Huber
Date: 2024-07-24T07:51:47-05:00
New Revision: 550b83d658755664a7f0f93b36242e885743a91b
URL: https://github.com/llvm/llvm-project/commit/550b83d658755664a7f0f93b36242e885743a91b
DIFF: https://github.com/llvm/llvm-project/commit/550b83d658755664a7f0f93b36242e885743a91b.diff
LOG: Revert "[libc] Remove 'packaged' GPU build support (#100208)"
Summary:
I forgot that the OpenMP tests still look for this, reverting for now
until I can make a fix.
This reverts commit c1c6ed83e9ac13c511961e5f5791034a63168e7e.
Added:
Modified:
libc/cmake/modules/LLVMLibCLibraryRules.cmake
libc/docs/gpu/building.rst
libc/docs/gpu/using.rst
libc/lib/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake
index e677b4cd2c28f..75bc81e2aee8e 100644
--- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake
+++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake
@@ -83,6 +83,97 @@ function(get_all_object_file_deps result fq_deps_list)
set(${result} ${all_deps} PARENT_SCOPE)
endfunction()
+# A rule to build a library from a collection of entrypoint objects and bundle
+# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
+# Usage:
+# add_gpu_entrypoint_library(
+# DEPENDS <list of add_entrypoint_object targets>
+# )
+function(add_gpu_entrypoint_library target_name base_target_name)
+ cmake_parse_arguments(
+ "ENTRYPOINT_LIBRARY"
+ "" # No optional arguments
+ "" # No single value arguments
+ "DEPENDS" # Multi-value arguments
+ ${ARGN}
+ )
+ if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
+ message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
+ "of 'add_entrypoint_object' targets.")
+ endif()
+
+ get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
+ get_all_object_file_deps(all_deps "${fq_deps_list}")
+
+ # The GPU 'libc' needs to be exported in a format that can be linked with
+ # offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
+ # fat binary and adds them to a static library.
+ set(objects "")
+ foreach(dep IN LISTS all_deps)
+ set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
+ string(FIND ${dep} "." last_dot_loc REVERSE)
+ math(EXPR name_loc "${last_dot_loc} + 1")
+ string(SUBSTRING ${dep} ${name_loc} -1 name)
+ if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
+ set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
+ elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+ set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
+ endif()
+
+ # Use the 'clang-offload-packager' to merge these files into a binary blob.
+ add_custom_command(
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
+ COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
+ "${prefix},file=$<JOIN:${object},,file=>" -o
+ ${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
+ DEPENDS ${dep} ${base_target_name}
+ COMMENT "Packaging LLVM offloading binary for '${object}'"
+ )
+ add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
+ "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
+ if(TARGET clang-offload-packager)
+ add_dependencies(${dep}.__gpubin__ clang-offload-packager)
+ endif()
+
+ # CMake does not permit setting the name on object files. In order to have
+ # human readable names we create an empty stub file with the entrypoint
+ # name. This empty file will then have the created binary blob embedded.
+ add_custom_command(
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
+ COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
+ DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
+ )
+ add_custom_target(${dep}.__stub__
+ DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
+
+ add_library(${dep}.__fatbin__
+ EXCLUDE_FROM_ALL OBJECT
+ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
+ )
+
+ # This is always compiled for the LLVM host triple instead of the native GPU
+ # triple that is used by default in the build.
+ target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
+ target_compile_options(${dep}.__fatbin__ PRIVATE
+ --target=${LLVM_HOST_TRIPLE}
+ "SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
+ add_dependencies(${dep}.__fatbin__
+ ${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name})
+
+ # Set the list of newly create fat binaries containing embedded device code.
+ list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
+ endforeach()
+
+ add_library(
+ ${target_name}
+ STATIC
+ ${objects}
+ )
+ set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
+endfunction(add_gpu_entrypoint_library)
+
# A rule to build a library from a collection of entrypoint objects and bundle
# it in a single LLVM-IR bitcode file.
# Usage:
diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst
index 60498e348395a..d3e64c6d42431 100644
--- a/libc/docs/gpu/building.rst
+++ b/libc/docs/gpu/building.rst
@@ -151,6 +151,25 @@ Build overview
Once installed, the GPU build will create several files used for
diff erent
targets. This section will briefly describe their purpose.
+**lib/<host-triple>/libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a**
+ A static library containing fat binaries supporting AMD GPUs. These are built
+ using the support described in the `clang documentation
+ <https://clang.llvm.org/docs/OffloadingDesign.html>`_. These are intended to
+ be static libraries included natively for offloading languages like CUDA, HIP,
+ or OpenMP. This implements the standard C library.
+
+**lib/<host-triple>/libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a**
+ A static library containing fat binaries that implements the standard math
+ library for AMD GPUs.
+
+**lib/<host-triple>/libcgpu-nvptx.a or lib/libcgpu-nvptx.a**
+ A static library containing fat binaries that implement the standard C library
+ for NVIDIA GPUs.
+
+**lib/<host-triple>/libmgpu-nvptx.a or lib/libmgpu-nvptx.a**
+ A static library containing fat binaries that implement the standard math
+ library for NVIDIA GPUs.
+
**include/<target-triple>**
The include directory where all of the generated headers for the target will
go. These definitions are strictly for the GPU when being targeted directly.
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
index 4034c04867c99..d5ad4c7a0368d 100644
--- a/libc/docs/gpu/using.rst
+++ b/libc/docs/gpu/using.rst
@@ -34,17 +34,16 @@ described in the `clang documentation
by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
-In order or link the GPU runtime, we simply pass this library to the embedded
-device linker job. This can be done using the ``-Xoffload-linker`` option, which
-forwards an argument to a ``clang`` job used to create the final GPU executable.
-The toolchain should pick up the C libraries automatically in most cases, so
-this shouldn't be necessary.
+The installation should contain a static library called ``libcgpu-amdgpu.a`` or
+``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted.
+These contain fat binaries compatible with the offloading toolchain such that
+they can be used directly.
.. code-block:: sh
- $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
- $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
- $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
+ $> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu
+ $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx
+ $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu
This will automatically link in the needed function definitions if they were
required by the user's application. Normally using the ``-fgpu-rdc`` option
diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt
index 4b7cfc4b76e2e..37acf3950b460 100644
--- a/libc/lib/CMakeLists.txt
+++ b/libc/lib/CMakeLists.txt
@@ -40,6 +40,20 @@ foreach(archive IN ZIP_LISTS
# Add the offloading version of the library for offloading languages. These
# are installed in the standard search path separate from the other libraries.
if(LIBC_TARGET_OS_IS_GPU)
+ add_gpu_entrypoint_library(
+ ${archive_1}gpu
+ ${archive_1}
+ DEPENDS
+ ${${archive_2}}
+ )
+ set_target_properties(
+ ${archive_1}gpu
+ PROPERTIES
+ ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}
+ ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
+ )
+ list(APPEND added_gpu_archive_targets ${archive_1}gpu)
+
add_bitcode_entrypoint_library(
${archive_1}bitcode
${archive_1}
@@ -51,6 +65,7 @@ foreach(archive IN ZIP_LISTS
PROPERTIES
OUTPUT_NAME ${archive_1}.bc
)
+ add_dependencies(${archive_1}gpu ${archive_1}bitcode)
list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode)
endif()
endforeach()
More information about the libc-commits
mailing list