[Openmp-commits] [openmp] 349c0aa - [OpenMP] Remove 'keep_alive' functionality from the device RTL

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Wed May 31 15:12:52 PDT 2023


Author: Joseph Huber
Date: 2023-05-31T17:12:43-05:00
New Revision: 349c0aacb38072a868421ce7c460514be57a3de7

URL: https://github.com/llvm/llvm-project/commit/349c0aacb38072a868421ce7c460514be57a3de7
DIFF: https://github.com/llvm/llvm-project/commit/349c0aacb38072a868421ce7c460514be57a3de7.diff

LOG: [OpenMP] Remove 'keep_alive' functionality from the device RTL

The OpenMP DeviceRTL uses a hacky workaround to keep certain runtime
calls alive. This used a function that prevented them from being
optimized out. We needed this hack because the 'OpenMPOpt' pass likes to
introduce new runtime calls into the TU. This then interacted badly with
the method of linking the bitcode file per-TU like we do with Nvidia.
The OpenMPOpt pass would then generate a runtime call to a function that
was never linked in.

This should not be a problem anymore because we unconditionally link in
the `libomptarget.devicertl.a` runtime library. This should thus only
extract symbols that are undefined. So, if we do end up with an
unresolved reference it will be resolved by the static library.

The downside to this is that if we are doing non-LTO NVPTX compilation
that introduces one of these calls it will be linked outside the module
and therefore provide the overhead of an external function call.
However, removing this flag should make optimizing things easier. We
will need to see if that performance is a problem.

Reviewed By: ye-luo

Differential Revision: https://reviews.llvm.org/D151324

Added: 
    

Modified: 
    openmp/libomptarget/DeviceRTL/CMakeLists.txt
    openmp/libomptarget/DeviceRTL/src/Utils.cpp
    openmp/libomptarget/DeviceRTL/src/exports

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 8419b87ef6222..7540a8c4ca0a1 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -31,9 +31,8 @@ if (LLVM_DIR)
   find_program(PACKAGER_TOOL clang-offload-packager PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
   find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
   find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
-  find_program(EXTRACT_TOOL llvm-extract PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
-  if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL) OR (NOT EXTRACT_TOOL) OR (NOT PACKAGER_TOOL))
-    libomptarget_say("Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL}, opt: ${OPT_TOOL}, llvm-extract: ${EXTRACT_TOOL}, or clang-offload-packager: ${PACKAGER_TOOL}")
+  if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL) OR (NOT PACKAGER_TOOL))
+    libomptarget_say("Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL}, opt: ${OPT_TOOL}, or clang-offload-packager: ${PACKAGER_TOOL}")
     return()
   else()
     libomptarget_say("Building DeviceRTL. Using clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} and opt: ${OPT_TOOL}")
@@ -45,7 +44,6 @@ elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDA
   set(PACKAGER_TOOL $<TARGET_FILE:clang-offload-packager>)
   set(LINK_TOOL $<TARGET_FILE:llvm-link>)
   set(OPT_TOOL $<TARGET_FILE:opt>)
-  set(EXTRACT_TOOL $<TARGET_FILE:llvm-extract>)
   libomptarget_say("Building DeviceRTL. Using clang from in-tree build")
 else()
   libomptarget_say("Not building DeviceRTL. No appropriate clang found")
@@ -114,7 +112,6 @@ set(src_files
 set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512)
 set(link_opt_flags  -O3        -openmp-opt-disable -attributor-enable=module)
 set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
-set(link_extract_flag --func='__keep_alive' --delete)
 
 # Prepend -I to each list element
 set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}")
@@ -220,18 +217,6 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
   # Install bitcode library under the lib destination folder.
   install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}")
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
-      COMMAND ${EXTRACT_TOOL} ${link_extract_flag} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
-                      -o ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
-      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bclib_target_name}
-      COMMENT "Extracting LLVM bitcode ${bclib_name}"
-  )
-  if("${EXTRACT_TOOL}" STREQUAL "$<TARGET_FILE:llvm-extract>")
-    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
-      DEPENDS llvm-extract
-      APPEND)
-  endif()
-
   set(target_feature "")
   if("${target_triple}" STREQUAL "nvptx64-nvidia-cuda")
     set(target_feature "feature=+ptx61")
@@ -240,8 +225,8 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
   # Package the bitcode in the bitcode and embed it in an ELF for the static library
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
       COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
-        "--image=file=${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name},${target_feature},triple=${target_triple},arch=${target_cpu},kind=openmp"
-      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
+        "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},${target_feature},triple=${target_triple},arch=${target_cpu},kind=openmp"
+      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
       COMMENT "Packaging LLVM offloading binary ${bclib_name}.out"
   )
   if("${PACKAGER_TOOL}" STREQUAL "$<TARGET_FILE:clang-offload-packager>")

diff  --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
index 41a919dc74c3c..d74f7e069cf67 100644
--- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -21,16 +21,6 @@ using namespace ompx;
 
 extern "C" __attribute__((weak)) int IsSPMDMode;
 
-/// Helper to keep code alive without introducing a performance penalty.
-extern "C" __attribute__((weak, optnone, cold, used, retain)) void
-__keep_alive() {
-  __kmpc_get_hardware_thread_id_in_block();
-  __kmpc_get_hardware_num_threads_in_block();
-  __kmpc_get_warp_size();
-  __kmpc_barrier_simple_spmd(nullptr, IsSPMDMode);
-  __kmpc_barrier_simple_generic(nullptr, IsSPMDMode);
-}
-
 namespace impl {
 
 bool isSharedMemPtr(const void *Ptr) { return false; }

diff  --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports
index 0a23157cbb098..85fd459fee1b1 100644
--- a/openmp/libomptarget/DeviceRTL/src/exports
+++ b/openmp/libomptarget/DeviceRTL/src/exports
@@ -4,7 +4,6 @@ __kmpc_*
 
 _ZN4ompx*
 
-__keep_alive
 IsSPMDMode
 
 memcmp


        


More information about the Openmp-commits mailing list