[llvm-branch-commits] [openmp] 763c1f9 - [OpenMP] Drop the static library libomptarget-nvptx

Thu Jan 14 10:42:29 PST 2021

Author: Shilei Tian
Date: 2021-01-14T13:34:25-05:00
New Revision: 763c1f9933463c40c39c04b68bbe4d296823b003

URL: https://github.com/llvm/llvm-project/commit/763c1f9933463c40c39c04b68bbe4d296823b003
DIFF: https://github.com/llvm/llvm-project/commit/763c1f9933463c40c39c04b68bbe4d296823b003.diff

LOG: [OpenMP] Drop the static library libomptarget-nvptx

For NVPTX target, OpenMP provides a static library `libomptarget-nvptx`
built by NVCC, and another bitcode `libomptarget-nvptx-sm_{$sm}.bc` generated by
Clang. When compiling an OpenMP program, the `.bc` file will be fed to `clang`
in the second run on the program that compiles the target part. Then the generated
PTX file will be fed to `ptxas` to generate the object file, and finally the driver
invokes `nvlink` to generate the binary, where the static library will be appened
to `nvlink`.

One question is, why do we need two libraries? The only difference is, the static
library contains `omp_data.cu` and the bitcode library doesn't. It's unclear why
they were implemented in this way, but per D94565, there is no issue if we also
include the file into the bitcode library. Therefore, we can safely drop the
static library.

This patch is about the change in OpenMP. The driver will be updated as well if
this patch is accepted.

Reviewed By: jdoerfert, JonChesterfield

Differential Revision: https://reviews.llvm.org/D94573

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index ea11c8114166..200c6401d628 100644

--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -10,31 +10,6 @@
 #
 ##===----------------------------------------------------------------------===##
 
-set(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER "" CACHE STRING
-  "Path to alternate NVCC host compiler to be used by the NVPTX device RTL.")
-
-if(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER)
-  find_program(ALTERNATE_CUDA_HOST_COMPILER NAMES ${LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER})
-  if(NOT ALTERNATE_CUDA_HOST_COMPILER)
-    libomptarget_say("Not building CUDA offloading device RTL: invalid NVPTX alternate host compiler.")
-  endif()
-  set(CUDA_HOST_COMPILER ${ALTERNATE_CUDA_HOST_COMPILER} CACHE FILEPATH "" FORCE)
-endif()
-
-# We can't use clang as nvcc host preprocessor, so we attempt to replace it with
-# gcc.
-if(CUDA_HOST_COMPILER MATCHES clang)
-
-  find_program(LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER NAMES gcc)
-
-  if(NOT LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER)
-    libomptarget_say("Not building CUDA offloading device RTL: clang is not supported as NVCC host compiler.")
-    libomptarget_say("Please include gcc in your path or set LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER to the full path of of valid compiler.")
-    return()
-  endif()
-  set(CUDA_HOST_COMPILER "${LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER}" CACHE FILEPATH "" FORCE)
-endif()
-
 get_filename_component(devicertl_base_directory
   ${CMAKE_CURRENT_SOURCE_DIR}
   DIRECTORY)
@@ -44,28 +19,6 @@ set(devicertl_nvptx_directory
   ${devicertl_base_directory}/nvptx)
 
 if(LIBOMPTARGET_DEP_CUDA_FOUND)
-  libomptarget_say("Building CUDA offloading device RTL.")
-
-  # We really don't have any host code, so we don't need to care about
-  # propagating host flags.
-  set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-
-  set(cuda_src_files
-      ${devicertl_common_directory}/src/cancel.cu
-      ${devicertl_common_directory}/src/critical.cu
-      ${devicertl_common_directory}/src/data_sharing.cu
-      ${devicertl_common_directory}/src/libcall.cu
-      ${devicertl_common_directory}/src/loop.cu
-      ${devicertl_common_directory}/src/omp_data.cu
-      ${devicertl_common_directory}/src/omptarget.cu
-      ${devicertl_common_directory}/src/parallel.cu
-      ${devicertl_common_directory}/src/reduction.cu
-      ${devicertl_common_directory}/src/support.cu
-      ${devicertl_common_directory}/src/sync.cu
-      ${devicertl_common_directory}/src/task.cu
-      src/target_impl.cu
-  )
-
   # Build library support for the highest compute capability the system supports
   # and always build support for sm_35 by default
   if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35)
@@ -94,24 +47,6 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
   # Activate RTL message dumps if requested by the user.
   set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
     "Activate NVPTX device RTL debug messages.")
-  if(${LIBOMPTARGET_NVPTX_DEBUG})
-    set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v)
-  endif()
-
-  # NVPTX runtime library has to be statically linked. Dynamic linking is not
-  # yet supported by the CUDA toolchain on the device.
-  set(BUILD_SHARED_LIBS OFF)
-  set(CUDA_SEPARABLE_COMPILATION ON)
-  list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory}
-                              -I${devicertl_nvptx_directory}/src)
-  cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files}
-      OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG} ${MAX_SM_DEFINITION})
-
-  # Install device RTL under the lib destination folder.
-  install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-
-  target_link_libraries(omptarget-nvptx ${CUDA_LIBRARIES})
-
 
   # Check if we can create an LLVM bitcode implementation of the runtime library
   # that could be inlined in the user application. For that we need to find
@@ -124,18 +59,25 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
 
   include(LibomptargetNVPTXBitcodeLibrary)
 
-  set(bclib_default FALSE)
-  if (${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
-    set(bclib_default TRUE)
-  endif()
-  set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB ${bclib_default} CACHE BOOL
-    "Enable CUDA LLVM bitcode offloading device RTL.")
-  if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB})
-    if (NOT ${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
-      libomptarget_error_say("Cannot build CUDA LLVM bitcode offloading device RTL!")
-    endif()
+  if (LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED)
     libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")
 
+    set(cuda_src_files
+      ${devicertl_common_directory}/src/cancel.cu
+      ${devicertl_common_directory}/src/critical.cu
+      ${devicertl_common_directory}/src/data_sharing.cu
+      ${devicertl_common_directory}/src/libcall.cu
+      ${devicertl_common_directory}/src/loop.cu
+      ${devicertl_common_directory}/src/omp_data.cu
+      ${devicertl_common_directory}/src/omptarget.cu
+      ${devicertl_common_directory}/src/parallel.cu
+      ${devicertl_common_directory}/src/reduction.cu
+      ${devicertl_common_directory}/src/support.cu
+      ${devicertl_common_directory}/src/sync.cu
+      ${devicertl_common_directory}/src/task.cu
+      src/target_impl.cu
+    )
+
     # Set flags for LLVM Bitcode compilation.
     set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS}
                  -I${devicertl_base_directory}
@@ -195,7 +137,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
       # Copy library to destination.
       add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
                          COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
-                         $<TARGET_FILE_DIR:omptarget-nvptx>)
+                         ${LIBOMPTARGET_LIBRARY_DIR})
 
       # Install bitcode library under the lib destination folder.
       install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "${OPENMP_INSTALL_LIBDIR}")
@@ -204,5 +146,5 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
 
   add_subdirectory(test)
 else()
-  libomptarget_say("Not building CUDA offloading device RTL: CUDA tools not found in the system.")
+  libomptarget_say("Not building CUDA offloading device RTL: tools to build bc lib not found in the system.")
 endif()