[libc-commits] [libc] d51d2b5 - [libc] Support add_object_library for the GPU build
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Mon Feb 20 07:37:27 PST 2023
Author: Joseph Huber
Date: 2023-02-20T09:37:18-06:00
New Revision: d51d2b5909036cd13fcca7d5b5d1bed48dff3053
URL: https://github.com/llvm/llvm-project/commit/d51d2b5909036cd13fcca7d5b5d1bed48dff3053
DIFF: https://github.com/llvm/llvm-project/commit/d51d2b5909036cd13fcca7d5b5d1bed48dff3053.diff
LOG: [libc] Support add_object_library for the GPU build
This patch unifies the handling of generating the GPU build targets
between the `add_entrypoint_library` and the `add_object_library`
functions. The `_build_gpu_objects` function will create two targets.
One contains a single object file with several GPU binaries embedded in
it, a so-called fatbinary. The other is a direct compile of the
supported target to be used internally only. This patch pulls out some
of the properties logic so that we can handle both more easily. This
patch also required adding an ovverride `NO_GPU_BUILD` for cases when
we only want to build the source file as normal.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D144214
Added:
Modified:
libc/cmake/modules/LLVMLibCObjectRules.cmake
libc/startup/gpu/amdgpu/CMakeLists.txt
libc/startup/gpu/nvptx/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index dc5b6f28a7b18..bdd600df4de13 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -59,19 +59,23 @@ function(_get_common_compile_options output_var flags)
set(${output_var} ${compile_options} PARENT_SCOPE)
endfunction()
-# Builds the entrypoint target for the GPU.
+# Builds the object target for the GPU.
+# This compiles the target for all supported architectures and embeds it into
+# host binary for installing. The internal target contains the GPU code directly
+# compiled for a single architecture used internally.
# Usage:
-# _build_gpu_entrypoint_objects(
+# _build_gpu_objects(
# <target_name>
+# <internal_target_name>
# SRCS <list of .cpp files>
# HDRS <list of .h files>
# DEPENDS <list of dependencies>
# COMPILE_OPTIONS <optional list of special compile options for this target>
# FLAGS <optional list of flags>
# )
-function(_build_gpu_entrypoint_objects fq_target_name)
+function(_build_gpu_objects fq_target_name internal_target_name)
cmake_parse_arguments(
- "ADD_GPU_ENTRYPOINT_OBJ"
+ "ADD_GPU_OBJ"
"" # No optional arguments
"NAME;CXX_STANDARD" # Single value arguments
"SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments
@@ -82,7 +86,7 @@ function(_build_gpu_entrypoint_objects fq_target_name)
# this so we can support multiple accelerators on the same machine.
foreach(gpu_arch ${all_gpu_architectures})
set(gpu_target_name ${fq_target_name}.${gpu_arch})
- set(compile_options ${ADD_GPU_ENTRYPOINT_OBJ_COMPILE_OPTIONS})
+ set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS})
# Derive the triple from the specified architecture.
if("${gpu_arch}" IN_LIST all_amdgpu_architectures)
set(gpu_target_triple "amdgcn-amd-amdhsa")
@@ -101,14 +105,16 @@ function(_build_gpu_entrypoint_objects fq_target_name)
add_library(${gpu_target_name}
EXCLUDE_FROM_ALL
OBJECT
- ${ADD_GPU_ENTRYPOINT_OBJ_SRCS}
- ${ADD_GPU_ENTRYPOINT_OBJ_HDRS}
+ ${ADD_GPU_OBJ_SRCS}
+ ${ADD_GPU_OBJ_HDRS}
)
target_compile_options(${gpu_target_name} PRIVATE ${compile_options})
target_include_directories(${gpu_target_name} PRIVATE ${include_dirs})
- add_dependencies(${gpu_target_name} ${ADD_GPU_ENTRYPOINT_OBJ_DEPENDS})
target_compile_definitions(${gpu_target_name} PRIVATE LIBC_COPT_PUBLIC_PACKAGING)
+ if(ADD_GPU_OBJ_DEPENDS)
+ add_dependencies(${gpu_target_name} ${ADD_GPU_OBJ_DEPENDS})
+ endif()
# Append this target to a list of images to package into a single binary.
set(input_file $<TARGET_OBJECTS:${gpu_target_name}>)
@@ -135,7 +141,7 @@ function(_build_gpu_entrypoint_objects fq_target_name)
# TODO: In the future we will want to combine every architecture for a target
# into a single bitcode file and use that. For now we simply build for
# every single one and let the offloading linker handle it.
- get_filename_component(stub_filename ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} NAME)
+ get_filename_component(stub_filename ${ADD_GPU_OBJ_SRCS} NAME)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stubs)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} "// Empty file.\n")
add_library(
@@ -151,19 +157,7 @@ function(_build_gpu_entrypoint_objects fq_target_name)
target_include_directories(${fq_target_name} PRIVATE ${include_dirs})
add_dependencies(${fq_target_name} ${full_deps_list} ${packaged_target_name})
- set_target_properties(
- ${fq_target_name}
- PROPERTIES
- ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
- TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
- OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
- CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
- DEPS "${fq_deps_list}"
- FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
- )
-
# We only build the internal target for a single supported architecture.
- set(internal_target_name ${fq_target_name}.__internal__)
set(include_dirs ${LIBC_BUILD_DIR}/include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR})
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU OR
LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
@@ -171,8 +165,8 @@ function(_build_gpu_entrypoint_objects fq_target_name)
${internal_target_name}
EXCLUDE_FROM_ALL
OBJECT
- ${ADD_ENTRYPOINT_OBJ_SRCS}
- ${ADD_ENTRYPOINT_OBJ_HDRS}
+ ${ADD_GPU_OBJ_SRCS}
+ ${ADD_GPU_OBJ_HDRS}
)
target_compile_options(${internal_target_name} BEFORE PRIVATE
${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE})
@@ -182,17 +176,9 @@ function(_build_gpu_entrypoint_objects fq_target_name)
target_compile_options(${internal_target_name} PRIVATE -march=${LIBC_GPU_TARGET_ARCHITECTURE})
endif()
target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
- add_dependencies(${internal_target_name} ${full_deps_list})
- set_target_properties(
- ${internal_target_name}
- PROPERTIES
- CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
- FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
- )
- set_target_properties(
- ${fq_target_name}
- PROPERTIES OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name}>"
- )
+ if(full_deps_list)
+ add_dependencies(${internal_target_name} ${full_deps_list})
+ endif()
endif()
endfunction()
@@ -209,7 +195,7 @@ endfunction()
function(create_object_library fq_target_name)
cmake_parse_arguments(
"ADD_OBJECT"
- "" # No optional arguments
+ "NO_GPU_BUNDLE" # No optional arguments
"CXX_STANDARD" # Single value arguments
"SRCS;HDRS;COMPILE_OPTIONS;DEPENDS;FLAGS" # Multivalue arguments
${ARGN}
@@ -219,28 +205,49 @@ function(create_object_library fq_target_name)
message(FATAL_ERROR "'add_object_library' rule requires SRCS to be specified.")
endif()
- add_library(
- ${fq_target_name}
- EXCLUDE_FROM_ALL
- OBJECT
- ${ADD_OBJECT_SRCS}
- ${ADD_OBJECT_HDRS}
- )
- target_include_directories(
- ${fq_target_name}
- PRIVATE
- ${LIBC_BUILD_DIR}/include
- ${LIBC_SOURCE_DIR}
- ${LIBC_BUILD_DIR}
- )
+ # The GPU build uses a separate internal file.
+ if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE})
+ set(internal_target_name ${fq_target_name}.__internal__)
+ else()
+ set(internal_target_name ${fq_target_name})
+ endif()
+
+ get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS})
_get_common_compile_options(
compile_options
"${ADD_OBJECT_FLAGS}"
${ADD_OBJECT_COMPILE_OPTIONS}
)
- target_compile_options(${fq_target_name} PRIVATE ${compile_options})
- get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS})
+ # GPU builds require special handling for the objects because we want to
+ # export several
diff erent targets at once, e.g. for both Nvidia and AMD.
+ if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE})
+ _build_gpu_objects(
+ ${fq_target_name}
+ ${internal_target_name}
+ SRCS ${ADD_OBJECT_SRCS}
+ HDRS ${ADD_OBJECT_HDRS}
+ DEPENDS ${fq_deps_list}
+ COMPILE_OPTIONS ${common_compile_options}
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
+ )
+ else()
+ add_library(
+ ${fq_target_name}
+ EXCLUDE_FROM_ALL
+ OBJECT
+ ${ADD_OBJECT_SRCS}
+ ${ADD_OBJECT_HDRS}
+ )
+ target_include_directories(
+ ${fq_target_name}
+ PRIVATE
+ ${LIBC_BUILD_DIR}/include
+ ${LIBC_SOURCE_DIR}
+ ${LIBC_BUILD_DIR}
+ )
+ target_compile_options(${fq_target_name} PRIVATE ${compile_options})
+ endif()
if(SHOW_INTERMEDIATE_OBJECTS)
message(STATUS "Adding object library ${fq_target_name}")
@@ -262,11 +269,18 @@ function(create_object_library fq_target_name)
${fq_target_name}
PROPERTIES
TARGET_TYPE ${OBJECT_LIBRARY_TARGET_TYPE}
- OBJECT_FILES "$<TARGET_OBJECTS:${fq_target_name}>"
CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD}
DEPS "${fq_deps_list}"
FLAGS "${ADD_OBJECT_FLAGS}"
)
+
+ if(TARGET ${internal_target_name})
+ set_target_properties(
+ ${fq_target_name}
+ PROPERTIES
+ OBJECT_FILES "$<TARGET_OBJECTS:${internal_target_name}>"
+ )
+ endif()
endfunction(create_object_library)
# Internal function, used by `add_object_library`.
@@ -483,13 +497,13 @@ function(create_entrypoint_object fq_target_name)
# GPU builds require special handling for the objects because we want to
# export several
diff erent targets at once, e.g. for both Nvidia and AMD.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
- _build_gpu_entrypoint_objects(
+ _build_gpu_objects(
${fq_target_name}
+ ${internal_target_name}
SRCS ${ADD_ENTRYPOINT_OBJ_SRCS}
HDRS ${ADD_ENTRYPOINT_OBJ_HDRS}
COMPILE_OPTIONS ${common_compile_options}
DEPENDS ${full_deps_list}
- CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
else()
@@ -505,12 +519,6 @@ function(create_entrypoint_object fq_target_name)
target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options})
target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
add_dependencies(${internal_target_name} ${full_deps_list})
- set_target_properties(
- ${internal_target_name}
- PROPERTIES
- CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
- FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
- )
add_library(
${fq_target_name}
@@ -524,22 +532,36 @@ function(create_entrypoint_object fq_target_name)
target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING)
target_include_directories(${fq_target_name} PRIVATE ${include_dirs})
add_dependencies(${fq_target_name} ${full_deps_list})
+ endif()
+
+ set_target_properties(
+ ${fq_target_name}
+ PROPERTIES
+ ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
+ TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
+ OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
+ CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
+ DEPS "${fq_deps_list}"
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
+ )
+ if(TARGET ${internal_target_name})
+ set_target_properties(
+ ${internal_target_name}
+ PROPERTIES
+ CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
+ )
set_target_properties(
${fq_target_name}
PROPERTIES
- ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
- TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
- OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
# TODO: We don't need to list internal object files if the internal
# target is a normal static library.
OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name}>"
- CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
- DEPS "${fq_deps_list}"
- FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
endif()
+
if(LLVM_LIBC_ENABLE_LINTING AND TARGET ${internal_target_name})
if(NOT LLVM_LIBC_CLANG_TIDY)
message(FATAL_ERROR "Something is wrong! LLVM_LIBC_ENABLE_LINTING is "
diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt
index 8de0955d9ab3f..c2ae950eaafcb 100644
--- a/libc/startup/gpu/amdgpu/CMakeLists.txt
+++ b/libc/startup/gpu/amdgpu/CMakeLists.txt
@@ -10,6 +10,7 @@ add_startup_object(
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
-emit-llvm # AMDGPU's intermediate object file format is bitcode.
--target=${LIBC_GPU_TARGET_TRIPLE}
+ NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
)
get_fq_target_name(crt1 fq_name)
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index 2057fcfd84d2f..f7f58ec702bf2 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -10,6 +10,7 @@ add_startup_object(
-x cuda # Use the CUDA toolchain to emit the `_start` kernel.
--offload-device-only
--offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE}
+ NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
)
get_fq_target_name(crt1 fq_name)
More information about the libc-commits
mailing list